source: src/main/java/weka/experiment/Experiment.java @ 10

Last change on this file since 10 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 34.5 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    Experiment.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.experiment;
25
26import weka.core.AdditionalMeasureProducer;
27import weka.core.FastVector;
28import weka.core.Instances;
29import weka.core.Option;
30import weka.core.OptionHandler;
31import weka.core.RevisionHandler;
32import weka.core.RevisionUtils;
33import weka.core.Utils;
34import weka.core.converters.AbstractFileLoader;
35import weka.core.converters.ConverterUtils;
36import weka.core.xml.KOML;
37import weka.core.xml.XMLOptions;
38import weka.experiment.xml.XMLExperiment;
39
40import java.beans.PropertyDescriptor;
41import java.io.BufferedInputStream;
42import java.io.BufferedOutputStream;
43import java.io.File;
44import java.io.FileInputStream;
45import java.io.FileOutputStream;
46import java.io.ObjectInputStream;
47import java.io.ObjectOutputStream;
48import java.io.Serializable;
49import java.lang.reflect.Array;
50import java.lang.reflect.Method;
51import java.util.Enumeration;
52import java.util.Vector;
53
54import javax.swing.DefaultListModel;
55
56/**
57 * Holds all the necessary configuration information for a standard
58 * type experiment. This object is able to be serialized for storage
59 * on disk.
60 *
61 <!-- options-start -->
62 * Valid options are: <p/>
63 *
64 * <pre> -L &lt;num&gt;
65 *  The lower run number to start the experiment from.
66 *  (default 1)</pre>
67 *
68 * <pre> -U &lt;num&gt;
69 *  The upper run number to end the experiment at (inclusive).
70 *  (default 10)</pre>
71 *
72 * <pre> -T &lt;arff file&gt;
73 *  The dataset to run the experiment on.
74 *  (required, may be specified multiple times)</pre>
75 *
76 * <pre> -P &lt;class name&gt;
77 *  The full class name of a ResultProducer (required).
78 *  eg: weka.experiment.RandomSplitResultProducer</pre>
79 *
80 * <pre> -D &lt;class name&gt;
81 *  The full class name of a ResultListener (required).
82 *  eg: weka.experiment.CSVResultListener</pre>
83 *
84 * <pre> -N &lt;string&gt;
85 *  A string containing any notes about the experiment.
86 *  (default none)</pre>
87 *
88 * <pre>
89 * Options specific to result producer weka.experiment.RandomSplitResultProducer:
90 * </pre>
91 *
92 * <pre> -P &lt;percent&gt;
93 *  The percentage of instances to use for training.
94 *  (default 66)</pre>
95 *
96 * <pre> -D
97 * Save raw split evaluator output.</pre>
98 *
99 * <pre> -O &lt;file/directory name/path&gt;
100 *  The filename where raw output will be stored.
101 *  If a directory name is specified then then individual
102 *  outputs will be gzipped, otherwise all output will be
103 *  zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
104 *
105 * <pre> -W &lt;class name&gt;
106 *  The full class name of a SplitEvaluator.
107 *  eg: weka.experiment.ClassifierSplitEvaluator</pre>
108 *
109 * <pre> -R
110 *  Set when data is not to be randomized and the data sets' size.
111 *  Is not to be determined via probabilistic rounding.</pre>
112 *
113 * <pre>
114 * Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
115 * </pre>
116 *
117 * <pre> -W &lt;class name&gt;
118 *  The full class name of the classifier.
119 *  eg: weka.classifiers.bayes.NaiveBayes</pre>
120 *
121 * <pre> -C &lt;index&gt;
122 *  The index of the class for which IR statistics
123 *  are to be output. (default 1)</pre>
124 *
125 * <pre> -I &lt;index&gt;
126 *  The index of an attribute to output in the
127 *  results. This attribute should identify an
128 *  instance in order to know which instances are
129 *  in the test set of a cross validation. if 0
130 *  no output (default 0).</pre>
131 *
132 * <pre> -P
133 *  Add target and prediction columns to the result
134 *  for each fold.</pre>
135 *
136 * <pre>
137 * Options specific to classifier weka.classifiers.rules.ZeroR:
138 * </pre>
139 *
140 * <pre> -D
141 *  If set, classifier is run in debug mode and
142 *  may output additional info to the console</pre>
143 *
144 <!-- options-end -->
145 *
146 * All options after -- will be passed to the result producer. <p>
147 *
148 * @author Len Trigg (trigg@cs.waikato.ac.nz)
149 * @version $Revision: 5399 $
150 */
151public class Experiment 
152  implements Serializable, OptionHandler, RevisionHandler {
153 
154  /** for serialization */
155  static final long serialVersionUID = 44945596742646663L;
156 
157  /** The filename extension that should be used for experiment files */
158  public static String FILE_EXTENSION = ".exp";
159
160  /** Where results will be sent */
161  protected ResultListener m_ResultListener = new InstancesResultListener();
162 
163  /** The result producer */
164  protected ResultProducer m_ResultProducer = new RandomSplitResultProducer();
165
166  /** Lower run number */
167  protected int m_RunLower = 1;
168
169  /** Upper run number */
170  protected int m_RunUpper = 10;
171
172  /** An array of dataset files */
173  protected DefaultListModel m_Datasets = new DefaultListModel();
174
175  /** True if the exp should also iterate over a property of the RP */
176  protected boolean m_UsePropertyIterator = false;
177 
178  /** The path to the iterator property */
179  protected PropertyNode [] m_PropertyPath;
180 
181  /** The array of values to set the property to */
182  protected Object m_PropertyArray;
183
184  /** User notes about the experiment */
185  protected String m_Notes = "";
186
187  /** Method names of additional measures of objects contained in the
188      custom property iterator. Only methods names beginning with "measure"
189      and returning doubles are recognised */
190  protected String [] m_AdditionalMeasures = null;
191
192  /** True if the class attribute is the first attribute for all
193      datasets involved in this experiment. */
194  protected boolean m_ClassFirst = false;
195
196  /** If true an experiment will advance the current data set befor
197      any custom itererator */
198  protected boolean m_AdvanceDataSetFirst = true;
199
200  /**
201   * Sets whether the first attribute is treated as the class
202   * for all datasets involved in the experiment. This information
203   * is not output with the result of the experiments!
204   *
205   * @param flag        whether the class attribute is the first and not the last
206   */
207  public void classFirst(boolean flag) {
208   
209    m_ClassFirst = flag;
210  }
211 
212  /**
213   * Get the value of m_DataSetFirstFirst.
214   *
215   * @return Value of m_DataSetFirstFirst.
216   */
217  public boolean getAdvanceDataSetFirst() {
218   
219    return m_AdvanceDataSetFirst;
220  }
221 
222  /**
223   * Set the value of m_AdvanceDataSetFirst.
224   *
225   * @param newAdvanceDataSetFirst Value to assign to m_AdvanceRunFirst.
226   */
227  public void setAdvanceDataSetFirst(boolean newAdvanceDataSetFirst) {
228   
229    m_AdvanceDataSetFirst = newAdvanceDataSetFirst;
230  }
231 
232  /**
233   * Gets whether the custom property iterator should be used.
234   *
235   * @return true if so
236   */
237  public boolean getUsePropertyIterator() {
238   
239    return m_UsePropertyIterator;
240  }
241
242  /**
243   * Sets whether the custom property iterator should be used.
244   *
245   * @param newUsePropertyIterator true if so
246   */
247  public void setUsePropertyIterator(boolean newUsePropertyIterator) {
248   
249    m_UsePropertyIterator = newUsePropertyIterator;
250  }
251
252  /**
253   * Gets the path of properties taken to get to the custom property
254   * to iterate over.
255   *
256   * @return an array of PropertyNodes
257   */
258  public PropertyNode [] getPropertyPath() {
259   
260    return m_PropertyPath;
261  }
262 
263  /**
264   * Sets the path of properties taken to get to the custom property
265   * to iterate over.
266   *
267   * @param newPropertyPath an array of PropertyNodes
268   */
269  public void setPropertyPath(PropertyNode [] newPropertyPath) {
270   
271    m_PropertyPath = newPropertyPath;
272  }
273 
274  /**
275   * Sets the array of values to set the custom property to.
276   *
277   * @param newPropArray a value of type Object which should be an
278   * array of the appropriate values.
279   */
280  public void setPropertyArray(Object newPropArray) {
281
282    m_PropertyArray = newPropArray;
283  }
284
285  /**
286   * Gets the array of values to set the custom property to.
287   *
288   * @return a value of type Object which should be an
289   * array of the appropriate values.
290   */
291  public Object getPropertyArray() {
292
293    return m_PropertyArray;
294  }
295
296  /**
297   * Gets the number of custom iterator values that have been defined
298   * for the experiment.
299   *
300   * @return the number of custom property iterator values.
301   */
302  public int getPropertyArrayLength() {
303
304    return Array.getLength(m_PropertyArray);
305  }
306
307  /**
308   * Gets a specified value from the custom property iterator array.
309   *
310   * @param index the index of the value wanted
311   * @return the property array value
312   */
313  public Object getPropertyArrayValue(int index) {
314
315    return Array.get(m_PropertyArray, index);
316  }
317 
318  /* These may potentially want to be made un-transient if it is decided
319   * that experiments may be saved mid-run and later resumed
320   */
321  /** The current run number when the experiment is running */
322  protected transient int m_RunNumber;
323  /** The current dataset number when the experiment is running */
324  protected transient int m_DatasetNumber;
325  /** The current custom property value index when the experiment is running */
326  protected transient int m_PropertyNumber;
327  /** True if the experiment has finished running */
328  protected transient boolean m_Finished = true;
329  /** The dataset currently being used */
330  protected transient Instances m_CurrentInstances;
331  /** The custom property value that has actually been set */
332  protected transient int m_CurrentProperty;
333
334  /**
335   * When an experiment is running, this returns the current run number.
336   *
337   * @return the current run number.
338   */
339  public int getCurrentRunNumber() {
340    return m_RunNumber;
341  }
342
343  /**
344   * When an experiment is running, this returns the current dataset number.
345   *
346   * @return the current dataset number.
347   */
348  public int getCurrentDatasetNumber() {
349    return m_DatasetNumber;
350  }
351
352  /**
353   * When an experiment is running, this returns the index of the
354   * current custom property value.
355   *
356   * @return the index of the current custom property value.
357   */
358  public int getCurrentPropertyNumber() {
359    return m_PropertyNumber;
360  }
361 
362  /**
363   * Prepares an experiment for running, initializing current iterator
364   * settings.
365   *
366   * @throws Exception if an error occurs
367   */
368  public void initialize() throws Exception {
369   
370    m_RunNumber = getRunLower();
371    m_DatasetNumber = 0;
372    m_PropertyNumber = 0;
373    m_CurrentProperty = -1;
374    m_CurrentInstances = null;
375    m_Finished = false;
376    if (m_UsePropertyIterator && (m_PropertyArray == null)) {
377      throw new Exception("Null array for property iterator");
378    }
379    if (getRunLower() > getRunUpper()) {
380      throw new Exception("Lower run number is greater than upper run number");
381    }
382    if (getDatasets().size() == 0) {
383      throw new Exception("No datasets have been specified");
384    }
385    if (m_ResultProducer == null) {
386      throw new Exception("No ResultProducer set");
387    }
388    if (m_ResultListener == null) {
389      throw new Exception("No ResultListener set");
390    }
391
392    //    if (m_UsePropertyIterator && (m_PropertyArray != null)) {
393    determineAdditionalResultMeasures();
394      //    }
395
396    m_ResultProducer.setResultListener(m_ResultListener);
397    m_ResultProducer.setAdditionalMeasures(m_AdditionalMeasures);
398    m_ResultProducer.preProcess();
399
400    // constrain the additional measures to be only those allowable
401    // by the ResultListener
402    String [] columnConstraints = m_ResultListener.
403      determineColumnConstraints(m_ResultProducer);
404
405    if (columnConstraints != null) {
406      m_ResultProducer.setAdditionalMeasures(columnConstraints);
407    }
408  }
409
410  /**
411   * Iterate over the objects in the property array to determine what
412   * (if any) additional measures they support
413   *
414   * @throws Exception  if additional measures don't comply to the naming
415   *                    convention (starting with "measure")
416   */
417  private void determineAdditionalResultMeasures() throws Exception {
418    m_AdditionalMeasures = null;
419    FastVector measureNames = new FastVector();
420
421    // first try the result producer, then property array if applicable
422    if (m_ResultProducer instanceof AdditionalMeasureProducer) {
423      Enumeration am = ((AdditionalMeasureProducer)m_ResultProducer).
424        enumerateMeasures();
425      while (am.hasMoreElements()) {
426        String mname = (String)am.nextElement();
427        if (mname.startsWith("measure")) {
428          if (measureNames.indexOf(mname) == -1) {
429            measureNames.addElement(mname);
430          }
431        } else {
432          throw new Exception ("Additional measures in "
433                               + m_ResultProducer.getClass().getName()
434                               +" must obey the naming convention"
435                               +" of starting with \"measure\"");
436        }
437      }
438    }
439
440    if (m_UsePropertyIterator && (m_PropertyArray != null)) {
441      for (int i = 0; i < Array.getLength(m_PropertyArray); i++) {
442        Object current = Array.get(m_PropertyArray, i);
443
444        if (current instanceof AdditionalMeasureProducer) {
445          Enumeration am = ((AdditionalMeasureProducer)current).
446            enumerateMeasures();
447          while (am.hasMoreElements()) {
448            String mname = (String)am.nextElement();
449            if (mname.startsWith("measure")) {
450              if (measureNames.indexOf(mname) == -1) {
451                measureNames.addElement(mname);
452              }
453            } else {
454              throw new Exception ("Additional measures in "
455                                   + current.getClass().getName()
456                                   +" must obey the naming convention"
457                                   +" of starting with \"measure\"");
458            }
459          }
460        }
461      }
462    }
463    if (measureNames.size() > 0) {
464      m_AdditionalMeasures = new String [measureNames.size()];
465      for (int i=0;i<measureNames.size();i++) {
466        m_AdditionalMeasures[i] = (String)measureNames.elementAt(i);
467      }
468    }
469  }
470
471 
472  /**
473   * Recursively sets the custom property value, by setting all values
474   * along the property path.
475   *
476   * @param propertyDepth the current position along the property path
477   * @param origValue the value to set the property to
478   * @throws Exception if an error occurs
479   */
480  protected void setProperty(int propertyDepth, Object origValue)
481    throws Exception {
482   
483    PropertyDescriptor current = m_PropertyPath[propertyDepth].property;
484    Object subVal = null;
485    if (propertyDepth < m_PropertyPath.length - 1) {
486      Method getter = current.getReadMethod();
487      Object getArgs [] = { };
488      subVal = getter.invoke(origValue, getArgs);
489      setProperty(propertyDepth + 1, subVal);
490    } else {
491      subVal = Array.get(m_PropertyArray, m_PropertyNumber);
492    }
493    Method setter = current.getWriteMethod();
494    Object [] args = { subVal };
495    setter.invoke(origValue, args);
496  }
497
498  /**
499   * Returns true if there are more iterations to carry out in the experiment.
500   *
501   * @return true if so
502   */
503  public boolean hasMoreIterations() {
504
505    return !m_Finished;
506  }
507 
508  /**
509   * Carries out the next iteration of the experiment.
510   *
511   * @throws Exception if an error occurs
512   */
513  public void nextIteration() throws Exception {
514   
515    if (m_UsePropertyIterator) {
516      if (m_CurrentProperty != m_PropertyNumber) {
517        setProperty(0, m_ResultProducer);
518        m_CurrentProperty = m_PropertyNumber;
519      }
520    }
521   
522    if (m_CurrentInstances == null) {
523      File currentFile = (File) getDatasets().elementAt(m_DatasetNumber);
524      AbstractFileLoader loader = ConverterUtils.getLoaderForFile(currentFile);
525      loader.setFile(currentFile);
526      Instances data = new Instances(loader.getDataSet());
527      // only set class attribute if not already done by loader
528      if (data.classIndex() == -1) {
529        if (m_ClassFirst) {
530          data.setClassIndex(0);
531        } else {
532          data.setClassIndex(data.numAttributes() - 1);
533        }
534      }
535      m_CurrentInstances = data;
536      m_ResultProducer.setInstances(m_CurrentInstances);
537    }
538   
539    m_ResultProducer.doRun(m_RunNumber);
540
541    advanceCounters();
542  }
543
544  /**
545   * Increments iteration counters appropriately.
546   */
547  public void advanceCounters() {
548
549    if (m_AdvanceDataSetFirst) {
550      m_RunNumber ++;
551      if (m_RunNumber > getRunUpper()) {
552        m_RunNumber = getRunLower();
553        m_DatasetNumber ++;
554        m_CurrentInstances = null;
555        if (m_DatasetNumber >= getDatasets().size()) {
556          m_DatasetNumber = 0;
557          if (m_UsePropertyIterator) {
558            m_PropertyNumber ++;
559            if (m_PropertyNumber >= Array.getLength(m_PropertyArray)) {
560              m_Finished = true;
561            }
562          } else {
563            m_Finished = true;
564          }
565        }
566      }
567    } else { // advance by custom iterator before data set
568      m_RunNumber ++;
569      if (m_RunNumber > getRunUpper()) {
570        m_RunNumber = getRunLower();
571        if (m_UsePropertyIterator) {
572          m_PropertyNumber ++;
573          if (m_PropertyNumber >= Array.getLength(m_PropertyArray)) {
574            m_PropertyNumber = 0;
575            m_DatasetNumber ++;
576            m_CurrentInstances = null;
577            if (m_DatasetNumber >= getDatasets().size()) {
578              m_Finished = true;
579            } 
580          }
581        } else {
582          m_DatasetNumber ++;
583          m_CurrentInstances = null;
584          if (m_DatasetNumber >= getDatasets().size()) {
585            m_Finished = true;
586          }
587        }
588      }
589    }
590  }
591
592  /**
593   * Runs all iterations of the experiment, continuing past errors.
594   */
595  public void runExperiment() {
596
597    while (hasMoreIterations()) {
598      try {
599        nextIteration();
600      } catch (Exception ex) {
601        ex.printStackTrace();
602        System.err.println(ex.getMessage());
603        advanceCounters(); // Try to keep plowing through
604      }
605    }
606  }
607
608  /**
609   * Signals that the experiment is finished running, so that cleanup
610   * can be done.
611   *
612   * @throws Exception if an error occurs
613   */
614  public void postProcess() throws Exception {
615
616    m_ResultProducer.postProcess();
617  }
618 
619  /**
620   * Gets the datasets in the experiment.
621   *
622   * @return the datasets in the experiment.
623   */
624  public DefaultListModel getDatasets() {
625    return m_Datasets;
626  }
627
628  /**
629   * Set the datasets to use in the experiment
630   * @param ds the list of datasets to use
631   */
632  public void setDatasets(DefaultListModel ds) {
633    m_Datasets = ds;
634  }
635
636  /**
637   * Gets the result listener where results will be sent.
638   *
639   * @return the result listener where results will be sent.
640   */
641  public ResultListener getResultListener() {
642   
643    return m_ResultListener;
644  }
645 
646  /**
647   * Sets the result listener where results will be sent.
648   *
649   * @param newResultListener the result listener where results will be sent.
650   */
651  public void setResultListener(ResultListener newResultListener) {
652   
653    m_ResultListener = newResultListener;
654  }
655 
656  /**
657   * Get the result producer used for the current experiment.
658   *
659   * @return the result producer used for the current experiment.
660   */
661  public ResultProducer getResultProducer() {
662   
663    return m_ResultProducer;
664  }
665 
666  /**
667   * Set the result producer used for the current experiment.
668   *
669   * @param newResultProducer result producer to use for the current
670   * experiment.
671   */
672  public void setResultProducer(ResultProducer newResultProducer) {
673   
674    m_ResultProducer = newResultProducer;
675  }
676 
677  /**
678   * Get the upper run number for the experiment.
679   *
680   * @return the upper run number for the experiment.
681   */
682  public int getRunUpper() {
683   
684    return m_RunUpper;
685  }
686 
687  /**
688   * Set the upper run number for the experiment.
689   *
690   * @param newRunUpper the upper run number for the experiment.
691   */
692  public void setRunUpper(int newRunUpper) {
693   
694    m_RunUpper = newRunUpper;
695  }
696 
697  /**
698   * Get the lower run number for the experiment.
699   *
700   * @return the lower run number for the experiment.
701   */
702  public int getRunLower() {
703   
704    return m_RunLower;
705  }
706 
707  /**
708   * Set the lower run number for the experiment.
709   *
710   * @param newRunLower the lower run number for the experiment.
711   */
712  public void setRunLower(int newRunLower) {
713   
714    m_RunLower = newRunLower;
715  }
716
717 
718  /**
719   * Get the user notes.
720   *
721   * @return User notes associated with the experiment.
722   */
723  public String getNotes() {
724   
725    return m_Notes;
726  }
727 
728  /**
729   * Set the user notes.
730   *
731   * @param newNotes New user notes.
732   */
733  public void setNotes(String newNotes) {
734   
735    m_Notes = newNotes;
736  }
737 
738  /**
739   * Returns an enumeration describing the available options..
740   *
741   * @return an enumeration of all the available options.
742   */
743  public Enumeration listOptions() {
744
745    Vector newVector = new Vector(6);
746
747    newVector.addElement(new Option(
748             "\tThe lower run number to start the experiment from.\n"
749              +"\t(default 1)", 
750             "L", 1, 
751             "-L <num>"));
752    newVector.addElement(new Option(
753             "\tThe upper run number to end the experiment at (inclusive).\n"
754              +"\t(default 10)", 
755             "U", 1, 
756             "-U <num>"));
757    newVector.addElement(new Option(
758             "\tThe dataset to run the experiment on.\n"
759             + "\t(required, may be specified multiple times)", 
760             "T", 1, 
761             "-T <arff file>"));
762    newVector.addElement(new Option(
763             "\tThe full class name of a ResultProducer (required).\n"
764              +"\teg: weka.experiment.RandomSplitResultProducer", 
765             "P", 1, 
766             "-P <class name>"));
767    newVector.addElement(new Option(
768             "\tThe full class name of a ResultListener (required).\n"
769              +"\teg: weka.experiment.CSVResultListener", 
770             "D", 1, 
771             "-D <class name>"));
772    newVector.addElement(new Option(
773             "\tA string containing any notes about the experiment.\n"
774              +"\t(default none)", 
775             "N", 1, 
776             "-N <string>"));
777
778    if ((m_ResultProducer != null) &&
779        (m_ResultProducer instanceof OptionHandler)) {
780      newVector.addElement(new Option(
781             "",
782             "", 0, "\nOptions specific to result producer "
783             + m_ResultProducer.getClass().getName() + ":"));
784      Enumeration enm = ((OptionHandler)m_ResultProducer).listOptions();
785      while (enm.hasMoreElements()) {
786        newVector.addElement(enm.nextElement());
787      }
788    }
789    return newVector.elements();
790  }
791
792  /**
793   * Parses a given list of options. <p/>
794   *
795   <!-- options-start -->
796   * Valid options are: <p/>
797   *
798   * <pre> -L &lt;num&gt;
799   *  The lower run number to start the experiment from.
800   *  (default 1)</pre>
801   *
802   * <pre> -U &lt;num&gt;
803   *  The upper run number to end the experiment at (inclusive).
804   *  (default 10)</pre>
805   *
806   * <pre> -T &lt;arff file&gt;
807   *  The dataset to run the experiment on.
808   *  (required, may be specified multiple times)</pre>
809   *
810   * <pre> -P &lt;class name&gt;
811   *  The full class name of a ResultProducer (required).
812   *  eg: weka.experiment.RandomSplitResultProducer</pre>
813   *
814   * <pre> -D &lt;class name&gt;
815   *  The full class name of a ResultListener (required).
816   *  eg: weka.experiment.CSVResultListener</pre>
817   *
818   * <pre> -N &lt;string&gt;
819   *  A string containing any notes about the experiment.
820   *  (default none)</pre>
821   *
822   * <pre>
823   * Options specific to result producer weka.experiment.RandomSplitResultProducer:
824   * </pre>
825   *
826   * <pre> -P &lt;percent&gt;
827   *  The percentage of instances to use for training.
828   *  (default 66)</pre>
829   *
830   * <pre> -D
831   * Save raw split evaluator output.</pre>
832   *
833   * <pre> -O &lt;file/directory name/path&gt;
834   *  The filename where raw output will be stored.
835   *  If a directory name is specified then then individual
836   *  outputs will be gzipped, otherwise all output will be
837   *  zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
838   *
839   * <pre> -W &lt;class name&gt;
840   *  The full class name of a SplitEvaluator.
841   *  eg: weka.experiment.ClassifierSplitEvaluator</pre>
842   *
843   * <pre> -R
844   *  Set when data is not to be randomized and the data sets' size.
845   *  Is not to be determined via probabilistic rounding.</pre>
846   *
847   * <pre>
848   * Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
849   * </pre>
850   *
851   * <pre> -W &lt;class name&gt;
852   *  The full class name of the classifier.
853   *  eg: weka.classifiers.bayes.NaiveBayes</pre>
854   *
855   * <pre> -C &lt;index&gt;
856   *  The index of the class for which IR statistics
857   *  are to be output. (default 1)</pre>
858   *
859   * <pre> -I &lt;index&gt;
860   *  The index of an attribute to output in the
861   *  results. This attribute should identify an
862   *  instance in order to know which instances are
863   *  in the test set of a cross validation. if 0
864   *  no output (default 0).</pre>
865   *
866   * <pre> -P
867   *  Add target and prediction columns to the result
868   *  for each fold.</pre>
869   *
870   * <pre>
871   * Options specific to classifier weka.classifiers.rules.ZeroR:
872   * </pre>
873   *
874   * <pre> -D
875   *  If set, classifier is run in debug mode and
876   *  may output additional info to the console</pre>
877   *
878   <!-- options-end -->
879   *
880   * All options after -- will be passed to the result producer. <p>
881   *
882   * @param options the list of options as an array of strings
883   * @throws Exception if an option is not supported
884   */
885  public void setOptions(String [] options) throws Exception {
886
887    String lowerString = Utils.getOption('L', options);
888    if (lowerString.length() != 0) {
889      setRunLower(Integer.parseInt(lowerString));
890    } else {
891      setRunLower(1);
892    }
893    String upperString = Utils.getOption('U', options);
894    if (upperString.length() != 0) {
895      setRunUpper(Integer.parseInt(upperString));
896    } else {
897      setRunUpper(10);
898    }
899    if (getRunLower() > getRunUpper()) {
900      throw new Exception("Lower (" + getRunLower() 
901                          + ") is greater than upper (" 
902                          + getRunUpper() + ")");
903    }
904   
905    setNotes(Utils.getOption('N', options));
906   
907    getDatasets().removeAllElements();
908    String dataName;
909    do {
910      dataName = Utils.getOption('T', options);
911      if (dataName.length() != 0) {
912        File dataset = new File(dataName);
913        getDatasets().addElement(dataset);
914      }
915    } while (dataName.length() != 0);
916    if (getDatasets().size() == 0) {
917      throw new Exception("Required: -T <arff file name>");
918    }
919
920    String rlName = Utils.getOption('D', options);
921    if (rlName.length() == 0) {
922      throw new Exception("Required: -D <ResultListener class name>");
923    }
924    rlName = rlName.trim();
925    // split off any options
926    int breakLoc = rlName.indexOf(' ');
927    String clName = rlName;
928    String rlOptionsString = "";
929    String [] rlOptions = null;
930    if (breakLoc != -1) {
931      clName = rlName.substring(0, breakLoc);
932      rlOptionsString = rlName.substring(breakLoc).trim();
933      rlOptions = Utils.splitOptions(rlOptionsString);
934    }
935    setResultListener((ResultListener)Utils.forName(ResultListener.class,
936                                                    clName, rlOptions));
937
938    String rpName = Utils.getOption('P', options);
939    if (rpName.length() == 0) {
940      throw new Exception("Required: -P <ResultProducer class name>");
941    }
942    // Do it first without options, so if an exception is thrown during
943    // the option setting, listOptions will contain options for the actual
944    // RP.
945    //GHF -- nice idea, but it prevents you from using result producers that
946    //       have *required* parameters
947    setResultProducer((ResultProducer)Utils.forName(
948                      ResultProducer.class,
949                      rpName,
950                      Utils.partitionOptions(options) )); //GHF
951    //GHF if (getResultProducer() instanceof OptionHandler) {
952    //GHF  ((OptionHandler) getResultProducer())
953    //GHF  .setOptions(Utils.partitionOptions(options));
954    //GHF }
955  }
956
957  /**
958   * Gets the current settings of the experiment iterator.
959   *
960   * @return an array of strings suitable for passing to setOptions
961   */
962  public String [] getOptions() {
963
964    // Currently no way to set custompropertyiterators from the command line
965
966    m_UsePropertyIterator = false;
967    m_PropertyPath = null;
968    m_PropertyArray = null;
969   
970    String [] rpOptions = new String [0];
971    if ((m_ResultProducer != null) && 
972        (m_ResultProducer instanceof OptionHandler)) {
973      rpOptions = ((OptionHandler)m_ResultProducer).getOptions();
974    }
975   
976    String [] options = new String [rpOptions.length 
977                                   + getDatasets().size() * 2
978                                   + 11];
979    int current = 0;
980
981    options[current++] = "-L"; options[current++] = "" + getRunLower();
982    options[current++] = "-U"; options[current++] = "" + getRunUpper();
983    if (getDatasets().size() != 0) {
984      for (int i = 0; i < getDatasets().size(); i++) {
985        options[current++] = "-T";
986        options[current++] = getDatasets().elementAt(i).toString();
987      }
988    }
989    if (getResultListener() != null) {
990      options[current++] = "-D";
991      options[current++] = getResultListener().getClass().getName();
992    }
993    if (getResultProducer() != null) {
994      options[current++] = "-P";
995      options[current++] = getResultProducer().getClass().getName();
996    }
997    if (!getNotes().equals("")) {
998      options[current++] = "-N"; options[current++] = getNotes();
999    }
1000    options[current++] = "--";
1001
1002    System.arraycopy(rpOptions, 0, options, current, 
1003                     rpOptions.length);
1004    current += rpOptions.length;
1005    while (current < options.length) {
1006      options[current++] = "";
1007    }
1008    return options;
1009  }
1010
1011  /**
1012   * Gets a string representation of the experiment configuration.
1013   *
1014   * @return a value of type 'String'
1015   */
1016  public String toString() {
1017
1018    String result = "Runs from: " + m_RunLower + " to: " + m_RunUpper + '\n';
1019    result += "Datasets:";
1020    for (int i = 0; i < m_Datasets.size(); i ++) {
1021      result += " " + m_Datasets.elementAt(i);
1022    }
1023    result += '\n';
1024    result += "Custom property iterator: "
1025      + (m_UsePropertyIterator ? "on" : "off")
1026      + "\n";
1027    if (m_UsePropertyIterator) {
1028      if (m_PropertyPath == null) {
1029        throw new Error("*** null propertyPath ***");
1030      }
1031      if (m_PropertyArray == null) {
1032        throw new Error("*** null propertyArray ***");
1033      }
1034      if (m_PropertyPath.length > 1) {
1035        result += "Custom property path:\n";
1036        for (int i = 0; i < m_PropertyPath.length - 1; i++) {
1037          PropertyNode pn = m_PropertyPath[i];
1038          result += "" + (i + 1) + "  " + pn.parentClass.getName()
1039            + "::" + pn.toString()
1040            + ' ' + pn.value.toString() + '\n';
1041        }
1042      }
1043      result += "Custom property name:"
1044        + m_PropertyPath[m_PropertyPath.length - 1].toString() + '\n';
1045      result += "Custom property values:\n";
1046      for (int i = 0; i < Array.getLength(m_PropertyArray); i++) {
1047        Object current = Array.get(m_PropertyArray, i);
1048        result += " " + (i + 1)
1049          + " " + current.getClass().getName()
1050          + " " + current.toString() + '\n';
1051      }
1052    }
1053    result += "ResultProducer: " + m_ResultProducer + '\n';
1054    result += "ResultListener: " + m_ResultListener + '\n';
1055    if (!getNotes().equals("")) {
1056      result += "Notes: " + getNotes();
1057    }
1058    return result;
1059  }
1060
1061  /**
1062   * Loads an experiment from a file.
1063   *
1064   * @param filename    the file to load the experiment from
1065   * @return            the experiment
1066   * @throws Exception  if loading fails
1067   */
1068  public static Experiment read(String filename) throws Exception {
1069    Experiment  result;
1070   
1071    // KOML?
1072    if ( (KOML.isPresent()) && (filename.toLowerCase().endsWith(KOML.FILE_EXTENSION)) ) {
1073      result = (Experiment) KOML.read(filename);
1074    }
1075    // XML?
1076    else if (filename.toLowerCase().endsWith(".xml")) {
1077      XMLExperiment xml = new XMLExperiment(); 
1078      result = (Experiment) xml.read(filename);
1079    }
1080    // binary
1081    else {
1082      FileInputStream fi = new FileInputStream(filename);
1083      ObjectInputStream oi = new ObjectInputStream(
1084          new BufferedInputStream(fi));
1085      result = (Experiment)oi.readObject();
1086      oi.close();
1087    }
1088   
1089    return result;
1090  }
1091 
1092  /**
1093   * Writes the experiment to disk.
1094   *
1095   * @param filename    the file to write to
1096   * @param exp         the experiment to save
1097   * @throws Exception  if writing fails
1098   */
1099  public static void write(String filename, Experiment exp) throws Exception {
1100    // KOML?
1101    if ( (KOML.isPresent()) && (filename.toLowerCase().endsWith(KOML.FILE_EXTENSION)) ) {
1102      KOML.write(filename, exp);
1103    }
1104    // XML?
1105    else if (filename.toLowerCase().endsWith(".xml")) {
1106      XMLExperiment xml = new XMLExperiment(); 
1107      xml.write(filename, exp);
1108    }
1109    // binary
1110    else {
1111      FileOutputStream fo = new FileOutputStream(filename);
1112      ObjectOutputStream oo = new ObjectOutputStream(
1113          new BufferedOutputStream(fo));
1114      oo.writeObject(exp);
1115      oo.close();
1116    }
1117  }
1118 
1119  /**
1120   * Configures/Runs the Experiment from the command line.
1121   *
1122   * @param args command line arguments to the Experiment.
1123   */
1124  public static void main(String[] args) {
1125
1126    try {
1127      Experiment exp = null;
1128      // get options from XML?
1129      String xmlOption = Utils.getOption("xml", args);
1130      if (!xmlOption.equals(""))
1131         args = new XMLOptions(xmlOption).toArray();
1132     
1133      String expFile = Utils.getOption('l', args);
1134      String saveFile = Utils.getOption('s', args);
1135      boolean runExp = Utils.getFlag('r', args);
1136      if (expFile.length() == 0) {
1137        exp = new Experiment();
1138        try {
1139          exp.setOptions(args);
1140          Utils.checkForRemainingOptions(args);
1141        } catch (Exception ex) {
1142          ex.printStackTrace();
1143          String result = "Usage:\n\n"
1144            + "-l <exp|xml file>\n"
1145            + "\tLoad experiment from file (default use cli options).\n"
1146      + "\tThe type is determined, based on the extension (" 
1147        + FILE_EXTENSION + " or .xml)\n"
1148            + "-s <exp|xml file>\n"
1149            + "\tSave experiment to file after setting other options.\n"
1150      + "\tThe type is determined, based on the extension (" 
1151        + FILE_EXTENSION + " or .xml)\n"
1152            + "\t(default don't save)\n"
1153            + "-r\n"
1154            + "\tRun experiment (default don't run)\n"
1155            + "-xml <filename | xml-string>\n"
1156            + "\tget options from XML-Data instead from parameters\n"
1157            + "\n";
1158          Enumeration enm = ((OptionHandler)exp).listOptions();
1159          while (enm.hasMoreElements()) {
1160            Option option = (Option) enm.nextElement();
1161            result += option.synopsis() + "\n";
1162            result += option.description() + "\n";
1163          }
1164          throw new Exception(result + "\n" + ex.getMessage());
1165        }
1166      } else {
1167        exp = read(expFile);
1168
1169        // allow extra datasets to be added to pre-loaded experiment from command line
1170        String dataName;
1171        do {
1172          dataName = Utils.getOption('T', args);
1173          if (dataName.length() != 0) {
1174            File dataset = new File(dataName);
1175            exp.getDatasets().addElement(dataset);
1176          }
1177        } while (dataName.length() != 0);
1178       
1179      }
1180      System.err.println("Experiment:\n" + exp.toString());
1181
1182      if (saveFile.length() != 0)
1183        write(saveFile, exp);
1184     
1185      if (runExp) {
1186        System.err.println("Initializing...");
1187        exp.initialize();
1188        System.err.println("Iterating...");
1189        exp.runExperiment();
1190        System.err.println("Postprocessing...");
1191        exp.postProcess();
1192      }
1193     
1194    } catch (Exception ex) {
1195      System.err.println(ex.getMessage());
1196    }
1197  }
1198 
1199  /**
1200   * Returns the revision string.
1201   *
1202   * @return            the revision
1203   */
1204  public String getRevision() {
1205    return RevisionUtils.extract("$Revision: 5399 $");
1206  }
1207} // Experiment
Note: See TracBrowser for help on using the repository browser.