source: src/main/java/weka/experiment/DatabaseResultProducer.java @ 12

Last change on this file since 12 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 22.1 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    DatabaseResultProducer.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.experiment;
25
26import weka.core.AdditionalMeasureProducer;
27import weka.core.Instances;
28import weka.core.Option;
29import weka.core.OptionHandler;
30import weka.core.RevisionUtils;
31import weka.core.Utils;
32
33import java.util.Enumeration;
34import java.util.Vector;
35
36/**
37 <!-- globalinfo-start -->
38 * Examines a database and extracts out the results produced by the specified ResultProducer and submits them to the specified ResultListener. If a result needs to be generated, the ResultProducer is used to obtain the result.
39 * <p/>
40 <!-- globalinfo-end -->
41 *
42 <!-- options-start -->
43 * Valid options are: <p/>
44 *
45 * <pre> -F &lt;field name&gt;
46 *  The name of the database field to cache over.
47 *  eg: "Fold" (default none)</pre>
48 *
49 * <pre> -W &lt;class name&gt;
50 *  The full class name of a ResultProducer.
51 *  eg: weka.experiment.CrossValidationResultProducer</pre>
52 *
53 * <pre>
54 * Options specific to result producer weka.experiment.CrossValidationResultProducer:
55 * </pre>
56 *
57 * <pre> -X &lt;number of folds&gt;
58 *  The number of folds to use for the cross-validation.
59 *  (default 10)</pre>
60 *
61 * <pre> -D
62 * Save raw split evaluator output.</pre>
63 *
64 * <pre> -O &lt;file/directory name/path&gt;
65 *  The filename where raw output will be stored.
66 *  If a directory name is specified then then individual
67 *  outputs will be gzipped, otherwise all output will be
68 *  zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
69 *
70 * <pre> -W &lt;class name&gt;
71 *  The full class name of a SplitEvaluator.
72 *  eg: weka.experiment.ClassifierSplitEvaluator</pre>
73 *
74 * <pre>
75 * Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
76 * </pre>
77 *
78 * <pre> -W &lt;class name&gt;
79 *  The full class name of the classifier.
80 *  eg: weka.classifiers.bayes.NaiveBayes</pre>
81 *
82 * <pre> -C &lt;index&gt;
83 *  The index of the class for which IR statistics
84 *  are to be output. (default 1)</pre>
85 *
86 * <pre> -I &lt;index&gt;
87 *  The index of an attribute to output in the
88 *  results. This attribute should identify an
89 *  instance in order to know which instances are
90 *  in the test set of a cross validation. if 0
91 *  no output (default 0).</pre>
92 *
93 * <pre> -P
94 *  Add target and prediction columns to the result
95 *  for each fold.</pre>
96 *
97 * <pre>
98 * Options specific to classifier weka.classifiers.rules.ZeroR:
99 * </pre>
100 *
101 * <pre> -D
102 *  If set, classifier is run in debug mode and
103 *  may output additional info to the console</pre>
104 *
105 <!-- options-end -->
106 *
107 * @author Len Trigg (trigg@cs.waikato.ac.nz)
108 * @version $Revision: 1.18 $
109 */
110public class DatabaseResultProducer 
111  extends DatabaseResultListener
112  implements ResultProducer, OptionHandler, AdditionalMeasureProducer {
113
114  /** for serialization */
115  static final long serialVersionUID = -5620660780203158666L;
116 
117  /** The dataset of interest */
118  protected Instances m_Instances;
119
120  /** The ResultListener to send results to */
121  protected ResultListener m_ResultListener = new CSVResultListener();
122
123  /** The ResultProducer used to generate results */
124  protected ResultProducer m_ResultProducer
125    = new CrossValidationResultProducer();
126
127  /** The names of any additional measures to look for in SplitEvaluators */
128  protected String [] m_AdditionalMeasures = null;
129
130  /**
131   * Returns a string describing this result producer
132   * @return a description of the result producer suitable for
133   * displaying in the explorer/experimenter gui
134   */
135  public String globalInfo() {
136    return "Examines a database and extracts out "
137      +"the results produced by the specified ResultProducer "
138      +"and submits them to the specified ResultListener. If a result needs "
139      +"to be generated, the ResultProducer is used to obtain the result.";
140  }
141
142  /**
143   * Creates the DatabaseResultProducer, letting the parent constructor do
144   * it's thing.
145   *
146   * @throws Exception if an error occurs
147   */
148  public DatabaseResultProducer() throws Exception {
149
150    super();
151  }
152 
153  /**
154   * Gets the keys for a specified run number. Different run
155   * numbers correspond to different randomizations of the data. Keys
156   * produced should be sent to the current ResultListener
157   *
158   * @param run the run number to get keys for.
159   * @throws Exception if a problem occurs while getting the keys
160   */
161  public void doRunKeys(int run) throws Exception {
162
163    if (m_ResultProducer == null) {
164      throw new Exception("No ResultProducer set");
165    }
166    if (m_ResultListener == null) {
167      throw new Exception("No ResultListener set");
168    }
169    if (m_Instances == null) {
170      throw new Exception("No Instances set");
171    }
172
173    // Tell the resultproducer to send results to us
174    m_ResultProducer.setResultListener(this);
175    m_ResultProducer.setInstances(m_Instances);
176    m_ResultProducer.doRunKeys(run);
177  }
178
179  /**
180   * Gets the results for a specified run number. Different run
181   * numbers correspond to different randomizations of the data. Results
182   * produced should be sent to the current ResultListener
183   *
184   * @param run the run number to get results for.
185   * @throws Exception if a problem occurs while getting the results
186   */
187  public void doRun(int run) throws Exception {
188
189    if (m_ResultProducer == null) {
190      throw new Exception("No ResultProducer set");
191    }
192    if (m_ResultListener == null) {
193      throw new Exception("No ResultListener set");
194    }
195    if (m_Instances == null) {
196      throw new Exception("No Instances set");
197    }
198
199    // Tell the resultproducer to send results to us
200    m_ResultProducer.setResultListener(this);
201    m_ResultProducer.setInstances(m_Instances);
202    m_ResultProducer.doRun(run);
203
204  }
205 
206  /**
207   * Prepare for the results to be received.
208   *
209   * @param rp the ResultProducer that will generate the results
210   * @throws Exception if an error occurs during preprocessing.
211   */
212  public void preProcess(ResultProducer rp) throws Exception {
213
214    super.preProcess(rp);
215    if (m_ResultListener == null) {
216      throw new Exception("No ResultListener set");
217    }
218    m_ResultListener.preProcess(this);
219  }
220
221  /**
222   * When this method is called, it indicates that no more results
223   * will be sent that need to be grouped together in any way.
224   *
225   * @param rp the ResultProducer that generated the results
226   * @throws Exception if an error occurs
227   */
228  public void postProcess(ResultProducer rp) throws Exception {
229
230    super.postProcess(rp);
231    m_ResultListener.postProcess(this);
232  }
233 
234  /**
235   * Prepare to generate results. The ResultProducer should call
236   * preProcess(this) on the ResultListener it is to send results to.
237   *
238   * @throws Exception if an error occurs during preprocessing.
239   */
240  public void preProcess() throws Exception {
241   
242    if (m_ResultProducer == null) {
243      throw new Exception("No ResultProducer set");
244    }
245    m_ResultProducer.setResultListener(this);
246    m_ResultProducer.preProcess();
247  }
248 
249  /**
250   * When this method is called, it indicates that no more requests to
251   * generate results for the current experiment will be sent. The
252   * ResultProducer should call preProcess(this) on the
253   * ResultListener it is to send results to.
254   *
255   * @throws Exception if an error occurs
256   */
257  public void postProcess() throws Exception {
258
259    m_ResultProducer.postProcess();
260  }
261   
262  /**
263   * Accepts results from a ResultProducer.
264   *
265   * @param rp the ResultProducer that generated the results
266   * @param key an array of Objects (Strings or Doubles) that uniquely
267   * identify a result for a given ResultProducer with given compatibilityState
268   * @param result the results stored in an array. The objects stored in
269   * the array may be Strings, Doubles, or null (for the missing value).
270   * @throws Exception if the result could not be accepted.
271   */
272  public void acceptResult(ResultProducer rp, Object [] key, Object [] result)
273    throws Exception {
274
275    if (m_ResultProducer != rp) {
276      throw new Error("Unrecognized ResultProducer sending results!!");
277    }
278    //    System.err.println("DBRP::acceptResult");
279
280    // Is the result needed by the listener?
281    boolean isRequiredByListener = m_ResultListener.isResultRequired(this,
282                                                                     key);
283    // Is the result already in the database?
284    boolean isRequiredByDatabase = super.isResultRequired(rp, key);
285
286    // Insert it into the database here
287    if (isRequiredByDatabase) {
288      // We could alternatively throw an exception if we only want values
289      // that are already in the database
290      if (result != null) {
291
292        // null result could occur from a chain of doRunKeys calls
293        super.acceptResult(rp, key, result);
294      }
295    }
296
297    // Pass it on
298    if (isRequiredByListener) {
299      m_ResultListener.acceptResult(this, key, result);
300    }
301  }
302
303  /**
304   * Determines whether the results for a specified key must be
305   * generated.
306   *
307   * @param rp the ResultProducer wanting to generate the results
308   * @param key an array of Objects (Strings or Doubles) that uniquely
309   * identify a result for a given ResultProducer with given compatibilityState
310   * @return true if the result should be generated
311   * @throws Exception if it could not be determined if the result
312   * is needed.
313   */
314  public boolean isResultRequired(ResultProducer rp, Object [] key) 
315    throws Exception {
316
317    if (m_ResultProducer != rp) {
318      throw new Error("Unrecognized ResultProducer sending results!!");
319    }
320    //    System.err.println("DBRP::isResultRequired");
321
322    // Is the result needed by the listener?
323    boolean isRequiredByListener = m_ResultListener.isResultRequired(this,
324                                                                     key);
325    // Is the result already in the database?
326    boolean isRequiredByDatabase = super.isResultRequired(rp, key);
327
328    if (!isRequiredByDatabase && isRequiredByListener) {
329      // Pass the result through to the listener
330      Object [] result = getResultFromTable(m_ResultsTableName,
331                                            rp, key);
332      System.err.println("Got result from database: "
333                         + DatabaseUtils.arrayToString(result));
334      m_ResultListener.acceptResult(this, key, result);
335      return false;
336    }
337
338    return (isRequiredByListener || isRequiredByDatabase);
339  }
340
341  /**
342   * Gets the names of each of the columns produced for a single run.
343   *
344   * @return an array containing the name of each column
345   * @throws Exception if something goes wrong.
346   */
347  public String [] getKeyNames() throws Exception {
348
349    return m_ResultProducer.getKeyNames();
350  }
351
352  /**
353   * Gets the data types of each of the columns produced for a single run.
354   * This method should really be static.
355   *
356   * @return an array containing objects of the type of each column. The
357   * objects should be Strings, or Doubles.
358   * @throws Exception if something goes wrong.
359   */
360  public Object [] getKeyTypes() throws Exception {
361
362    return m_ResultProducer.getKeyTypes();
363  }
364
365  /**
366   * Gets the names of each of the columns produced for a single run.
367   * A new result field is added for the number of results used to
368   * produce each average.
369   * If only averages are being produced the names are not altered, if
370   * standard deviations are produced then "Dev_" and "Avg_" are prepended
371   * to each result deviation and average field respectively.
372   *
373   * @return an array containing the name of each column
374   * @throws Exception if something goes wrong.
375   */
376  public String [] getResultNames() throws Exception {
377
378    return m_ResultProducer.getResultNames();
379  }
380
381  /**
382   * Gets the data types of each of the columns produced for a single run.
383   *
384   * @return an array containing objects of the type of each column. The
385   * objects should be Strings, or Doubles.
386   * @throws Exception if something goes wrong.
387   */
388  public Object [] getResultTypes() throws Exception {
389
390    return m_ResultProducer.getResultTypes();
391  }
392
393  /**
394   * Gets a description of the internal settings of the result
395   * producer, sufficient for distinguishing a ResultProducer
396   * instance from another with different settings (ignoring
397   * those settings set through this interface). For example,
398   * a cross-validation ResultProducer may have a setting for the
399   * number of folds. For a given state, the results produced should
400   * be compatible. Typically if a ResultProducer is an OptionHandler,
401   * this string will represent the command line arguments required
402   * to set the ResultProducer to that state.
403   *
404   * @return the description of the ResultProducer state, or null
405   * if no state is defined
406   */
407  public String getCompatibilityState() {
408
409    String result = "";
410    if (m_ResultProducer == null) {
411      result += "<null ResultProducer>";
412    } else {
413      result += "-W " + m_ResultProducer.getClass().getName();
414    }
415    result  += " -- " + m_ResultProducer.getCompatibilityState();
416    return result.trim();
417  }
418
419
420  /**
421   * Returns an enumeration describing the available options..
422   *
423   * @return an enumeration of all the available options.
424   */
425  public Enumeration listOptions() {
426
427    Vector newVector = new Vector(2);
428
429    newVector.addElement(new Option(
430             "\tThe name of the database field to cache over.\n"
431              +"\teg: \"Fold\" (default none)", 
432             "F", 1, 
433             "-F <field name>"));
434    newVector.addElement(new Option(
435             "\tThe full class name of a ResultProducer.\n"
436              +"\teg: weka.experiment.CrossValidationResultProducer", 
437             "W", 1, 
438             "-W <class name>"));
439
440    if ((m_ResultProducer != null) &&
441        (m_ResultProducer instanceof OptionHandler)) {
442      newVector.addElement(new Option(
443             "",
444             "", 0, "\nOptions specific to result producer "
445             + m_ResultProducer.getClass().getName() + ":"));
446      Enumeration enu = ((OptionHandler)m_ResultProducer).listOptions();
447      while (enu.hasMoreElements()) {
448        newVector.addElement(enu.nextElement());
449      }
450    }
451    return newVector.elements();
452  }
453
454  /**
455   * Parses a given list of options. <p/>
456   *
457   <!-- options-start -->
458   * Valid options are: <p/>
459   *
460   * <pre> -F &lt;field name&gt;
461   *  The name of the database field to cache over.
462   *  eg: "Fold" (default none)</pre>
463   *
464   * <pre> -W &lt;class name&gt;
465   *  The full class name of a ResultProducer.
466   *  eg: weka.experiment.CrossValidationResultProducer</pre>
467   *
468   * <pre>
469   * Options specific to result producer weka.experiment.CrossValidationResultProducer:
470   * </pre>
471   *
472   * <pre> -X &lt;number of folds&gt;
473   *  The number of folds to use for the cross-validation.
474   *  (default 10)</pre>
475   *
476   * <pre> -D
477   * Save raw split evaluator output.</pre>
478   *
479   * <pre> -O &lt;file/directory name/path&gt;
480   *  The filename where raw output will be stored.
481   *  If a directory name is specified then then individual
482   *  outputs will be gzipped, otherwise all output will be
483   *  zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
484   *
485   * <pre> -W &lt;class name&gt;
486   *  The full class name of a SplitEvaluator.
487   *  eg: weka.experiment.ClassifierSplitEvaluator</pre>
488   *
489   * <pre>
490   * Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
491   * </pre>
492   *
493   * <pre> -W &lt;class name&gt;
494   *  The full class name of the classifier.
495   *  eg: weka.classifiers.bayes.NaiveBayes</pre>
496   *
497   * <pre> -C &lt;index&gt;
498   *  The index of the class for which IR statistics
499   *  are to be output. (default 1)</pre>
500   *
501   * <pre> -I &lt;index&gt;
502   *  The index of an attribute to output in the
503   *  results. This attribute should identify an
504   *  instance in order to know which instances are
505   *  in the test set of a cross validation. if 0
506   *  no output (default 0).</pre>
507   *
508   * <pre> -P
509   *  Add target and prediction columns to the result
510   *  for each fold.</pre>
511   *
512   * <pre>
513   * Options specific to classifier weka.classifiers.rules.ZeroR:
514   * </pre>
515   *
516   * <pre> -D
517   *  If set, classifier is run in debug mode and
518   *  may output additional info to the console</pre>
519   *
520   <!-- options-end -->
521   *
522   * All option after -- will be passed to the result producer.
523   *
524   * @param options the list of options as an array of strings
525   * @throws Exception if an option is not supported
526   */
527  public void setOptions(String[] options) throws Exception {
528   
529    setCacheKeyName(Utils.getOption('F', options));
530   
531    String rpName = Utils.getOption('W', options);
532    if (rpName.length() == 0) {
533      throw new Exception("A ResultProducer must be specified with"
534                          + " the -W option.");
535    }
536    // Do it first without options, so if an exception is thrown during
537    // the option setting, listOptions will contain options for the actual
538    // RP.
539    setResultProducer((ResultProducer)Utils.forName(
540                      ResultProducer.class,
541                      rpName,
542                      null));
543    if (getResultProducer() instanceof OptionHandler) {
544      ((OptionHandler) getResultProducer())
545        .setOptions(Utils.partitionOptions(options));
546    }
547  }
548
549  /**
550   * Gets the current settings of the result producer.
551   *
552   * @return an array of strings suitable for passing to setOptions
553   */
554  public String [] getOptions() {
555
556    String [] seOptions = new String [0];
557    if ((m_ResultProducer != null) && 
558        (m_ResultProducer instanceof OptionHandler)) {
559      seOptions = ((OptionHandler)m_ResultProducer).getOptions();
560    }
561   
562    String [] options = new String [seOptions.length + 8];
563    int current = 0;
564
565    if (!getCacheKeyName().equals("")) {
566      options[current++] = "-F";
567      options[current++] = getCacheKeyName();
568    }
569    if (getResultProducer() != null) {
570      options[current++] = "-W";
571      options[current++] = getResultProducer().getClass().getName();
572    }
573    options[current++] = "--";
574
575    System.arraycopy(seOptions, 0, options, current, 
576                     seOptions.length);
577    current += seOptions.length;
578    while (current < options.length) {
579      options[current++] = "";
580    }
581    return options;
582  }
583
584  /**
585   * Set a list of method names for additional measures to look for
586   * in SplitEvaluators. This could contain many measures (of which only a
587   * subset may be produceable by the current resultProducer) if an experiment
588   * is the type that iterates over a set of properties.
589   * @param additionalMeasures an array of measure names, null if none
590   */
591  public void setAdditionalMeasures(String [] additionalMeasures) {
592    m_AdditionalMeasures = additionalMeasures;
593
594    if (m_ResultProducer != null) {
595      System.err.println("DatabaseResultProducer: setting additional "
596                         +"measures for "
597                         +"ResultProducer");
598      m_ResultProducer.setAdditionalMeasures(m_AdditionalMeasures);
599    }
600  }
601
602  /**
603   * Returns an enumeration of any additional measure names that might be
604   * in the result producer
605   * @return an enumeration of the measure names
606   */
607  public Enumeration enumerateMeasures() {
608    Vector newVector = new Vector();
609    if (m_ResultProducer instanceof AdditionalMeasureProducer) {
610      Enumeration en = ((AdditionalMeasureProducer)m_ResultProducer).
611        enumerateMeasures();
612      while (en.hasMoreElements()) {
613        String mname = (String)en.nextElement();
614        newVector.addElement(mname);
615      }
616    }
617    return newVector.elements();
618  }
619
620  /**
621   * Returns the value of the named measure
622   * @param additionalMeasureName the name of the measure to query for its value
623   * @return the value of the named measure
624   * @throws IllegalArgumentException if the named measure is not supported
625   */
626  public double getMeasure(String additionalMeasureName) {
627    if (m_ResultProducer instanceof AdditionalMeasureProducer) {
628      return ((AdditionalMeasureProducer)m_ResultProducer).
629        getMeasure(additionalMeasureName);
630    } else {
631      throw new IllegalArgumentException("DatabaseResultProducer: "
632                          +"Can't return value for : "+additionalMeasureName
633                          +". "+m_ResultProducer.getClass().getName()+" "
634                          +"is not an AdditionalMeasureProducer");
635    }
636  }
637 
638 
639  /**
640   * Sets the dataset that results will be obtained for.
641   *
642   * @param instances a value of type 'Instances'.
643   */
644  public void setInstances(Instances instances) {
645   
646    m_Instances = instances;
647  }
648 
649  /**
650   * Sets the object to send results of each run to.
651   *
652   * @param listener a value of type 'ResultListener'
653   */
654  public void setResultListener(ResultListener listener) {
655
656    m_ResultListener = listener;
657  }
658
659  /**
660   * Returns the tip text for this property
661   * @return tip text for this property suitable for
662   * displaying in the explorer/experimenter gui
663   */
664  public String resultProducerTipText() {
665    return "Set the result producer to use. If some results are not found "
666      +"in the source database then this result producer is used to generate "
667      +"them.";
668  }
669 
670  /**
671   * Get the ResultProducer.
672   *
673   * @return the ResultProducer.
674   */
675  public ResultProducer getResultProducer() {
676   
677    return m_ResultProducer;
678  }
679 
680  /**
681   * Set the ResultProducer.
682   *
683   * @param newResultProducer new ResultProducer to use.
684   */
685  public void setResultProducer(ResultProducer newResultProducer) {
686   
687    m_ResultProducer = newResultProducer;
688  }
689
690  /**
691   * Gets a text descrption of the result producer.
692   *
693   * @return a text description of the result producer.
694   */
695  public String toString() {
696
697    String result = "DatabaseResultProducer: ";
698    result += getCompatibilityState();
699    if (m_Instances == null) {
700      result += ": <null Instances>";
701    } else {
702      result += ": " + Utils.backQuoteChars(m_Instances.relationName());
703    }
704    return result;
705  }
706 
707  /**
708   * Returns the revision string.
709   *
710   * @return            the revision
711   */
712  public String getRevision() {
713    return RevisionUtils.extract("$Revision: 1.18 $");
714  }
715} // DatabaseResultProducer
Note: See TracBrowser for help on using the repository browser.