source: src/main/java/weka/classifiers/evaluation/output/prediction/AbstractOutput.java @ 15

Last change on this file since 15 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 18.2 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * AbstractOutput.java
19 * Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.classifiers.evaluation.output.prediction;
23
24import weka.classifiers.Classifier;
25import weka.core.Instance;
26import weka.core.Instances;
27import weka.core.Option;
28import weka.core.OptionHandler;
29import weka.core.Range;
30import weka.core.Utils;
31import weka.core.WekaException;
32import weka.core.converters.ConverterUtils.DataSource;
33
34import java.io.BufferedWriter;
35import java.io.File;
36import java.io.FileWriter;
37import java.io.Serializable;
38import java.util.Enumeration;
39import java.util.Vector;
40
41/**
42 * A superclass for outputting the classifications of a classifier.
43 * <p/>
44 * Basic use with a classifier and a test set:
45 * <pre>
46 * Classifier classifier = ... // trained classifier
47 * Instances testset = ... // the test set to output the predictions for
48 * StringBuffer buffer = ... // the string buffer to add the output to
49 * AbstractOutput output = new FunkyOutput();
50 * output.setHeader(...);
51 * output.printClassifications(classifier, testset);
52 * </pre>
53 *
54 * Basic use with a classifier and a data source:
55 * <pre>
56 * Classifier classifier = ... // trained classifier
57 * DataSource testset = ... // the data source to obtain the test set from to output the predictions for
58 * StringBuffer buffer = ... // the string buffer to add the output to
59 * AbstractOutput output = new FunkyOutput();
60 * output.setHeader(...);
61 * output.printClassifications(classifier, testset);
62 * </pre>
63 *
64 * In order to make the output generation easily integrate into GUI components,
65 * one can output the header, classifications and footer separately:
66 * <pre>
67 * Classifier classifier = ... // trained classifier
68 * Instances testset = ... // the test set to output the predictions for
69 * StringBuffer buffer = ... // the string buffer to add the output to
70 * AbstractOutput output = new FunkyOutput();
71 * output.setHeader(...);
72 * // print the header
73 * output.printHeader();
74 * // print the classifications one-by-one
75 * for (int i = 0; i &lt; testset.numInstances(); i++) {
76 *   output.printClassification(classifier, testset.instance(i), i);
77 *   // output progress information
78 *   if ((i+1) % 100 == 0)
79 *     System.out.println((i+1) + "/" + testset.numInstances());
80 * }
81 * // print the footer
82 * output.printFooter();
83 * </pre>
84 *
85 * @author  fracpete (fracpete at waikato dot ac dot nz)
86 * @version $Revision: 5466 $
87 */
88public abstract class AbstractOutput
89  implements Serializable, OptionHandler {
90
91  /** for serialization. */
92  private static final long serialVersionUID = 752696986017306241L;
93
94  /** the header of the dataset. */
95  protected Instances m_Header;
96
97  /** the buffer to write to. */
98  protected StringBuffer m_Buffer;
99 
100  /** the file buffer to write to. */
101  protected StringBuffer m_FileBuffer;
102 
103  /** whether to output the class distribution. */
104  protected boolean m_OutputDistribution;
105 
106  /** the range of attributes to output. */
107  protected Range m_Attributes;
108 
109  /** the number of decimals after the decimal point. */
110  protected int m_NumDecimals;
111
112  /** the file to store the output in. */
113  protected File m_OutputFile;
114 
115  /** whether to suppress the regular output and only store in file. */
116  protected boolean m_SuppressOutput;
117 
118  /**
119   * Initializes the output class.
120   */
121  public AbstractOutput() {
122    m_Header             = null;
123    m_OutputDistribution = false;
124    m_Attributes         = null;
125    m_Buffer             = null;
126    m_NumDecimals        = 3;
127    m_OutputFile         = new File(".");
128    m_FileBuffer         = new StringBuffer();
129    m_SuppressOutput     = false;
130  }
131 
132  /**
133   * Returns a string describing the output generator.
134   *
135   * @return            a description suitable for
136   *                    displaying in the GUI
137   */
138  public abstract String globalInfo();
139 
140  /**
141   * Returns a short display text, to be used in comboboxes.
142   *
143   * @return            a short display text
144   */
145  public abstract String getDisplay();
146
147  /**
148   * Returns an enumeration of all the available options..
149   *
150   * @return            an enumeration of all available options.
151   */
152  public Enumeration listOptions() {
153    Vector      result;
154   
155    result = new Vector();
156   
157    result.addElement(new Option(
158        "\tThe range of attributes to print in addition to the classification.\n"
159        + "\t(default: none)",
160        "p", 1, "-p <range>"));
161   
162    result.addElement(new Option(
163        "\tWhether to turn on the output of the class distribution.\n"
164        + "\tOnly for nominal class attributes.\n"
165        + "\t(default: off)",
166        "distribution", 0, "-distribution"));
167   
168    result.addElement(new Option(
169        "\tThe number of digits after the decimal point.\n"
170        + "\t(default: " + getDefaultNumDecimals() + ")",
171        "decimals", 1, "-decimals <num>"));
172   
173    result.addElement(new Option(
174        "\tThe file to store the output in, instead of outputting it on stdout.\n"
175        + "\tGets ignored if the supplied path is a directory.\n"
176        + "\t(default: .)",
177        "file", 1, "-file <path>"));
178   
179    result.addElement(new Option(
180        "\tIn case the data gets stored in a file, then this flag can be used\n"
181        + "\tto suppress the regular output.\n"
182        + "\t(default: not suppressed)",
183        "suppress", 0, "-suppress"));
184   
185    return result.elements();
186  }
187
188  /**
189   * Sets the OptionHandler's options using the given list. All options
190   * will be set (or reset) during this call (i.e. incremental setting
191   * of options is not possible).
192   *
193   * @param options     the list of options as an array of strings
194   * @throws Exception  if an option is not supported
195   */
196  public void setOptions(String[] options) throws Exception {
197    String      tmpStr;
198   
199    setAttributes(Utils.getOption("p", options));
200    setOutputDistribution(Utils.getFlag("distribution", options));
201   
202    tmpStr = Utils.getOption("decimals", options);
203    if (tmpStr.length() > 0)
204      setNumDecimals(Integer.parseInt(tmpStr));
205    else
206      setNumDecimals(getDefaultNumDecimals());
207   
208    tmpStr = Utils.getOption("file", options);
209    if (tmpStr.length() > 0)
210      setOutputFile(new File(tmpStr));
211    else
212      setOutputFile(new File("."));
213   
214    setSuppressOutput(Utils.getFlag("suppress", options));
215  }
216
217  /**
218   * Gets the current option settings for the OptionHandler.
219   *
220   * @return the list of current option settings as an array of strings
221   */
222  public String[] getOptions() {
223    Vector<String>      result;
224   
225    result = new Vector<String>();
226   
227    if (getAttributes().length() > 0) {
228      result.add("-p");
229      result.add(getAttributes());
230    }
231   
232    if (getOutputDistribution())
233      result.add("-distribution");
234
235    if (getNumDecimals() != getDefaultNumDecimals()) {
236      result.add("-decimals");
237      result.add("" + getNumDecimals());
238    }
239   
240    if (!getOutputFile().isDirectory()) {
241      result.add("-file");
242      result.add(getOutputFile().getAbsolutePath());
243      if (getSuppressOutput())
244        result.add("-suppress");
245    }
246   
247    return result.toArray(new String[result.size()]);
248  }
249 
250  /**
251   * Sets the header of the dataset.
252   *
253   * @param value       the header
254   */
255  public void setHeader(Instances value) {
256    m_Header = new Instances(value, 0);
257  }
258 
259  /**
260   * Returns the header of the dataset.
261   *
262   * @return            the header
263   */
264  public Instances getHeader() {
265    return m_Header;
266  }
267 
268  /**
269   * Sets the buffer to use.
270   *
271   * @param value       the buffer
272   */
273  public void setBuffer(StringBuffer value) {
274    m_Buffer = value;
275  }
276 
277  /**
278   * Returns the current buffer.
279   *
280   * @return            the buffer, can be null
281   */
282  public StringBuffer getBuffer() {
283    return m_Buffer;
284  }
285 
286  /**
287   * Sets the range of attributes to output.
288   *
289   * @param value       the range
290   */
291  public void setAttributes(String value) {
292    if (value.length() == 0)
293      m_Attributes = null;
294    else
295      m_Attributes = new Range(value);
296  }
297 
298  /**
299   * Returns the range of attributes to output.
300   *
301   * @return            the range
302   */
303  public String getAttributes() {
304    if (m_Attributes == null)
305      return "";
306    else
307      return m_Attributes.getRanges();
308  }
309
310  /**
311   * Returns the tip text for this property.
312   *
313   * @return            tip text for this property suitable for
314   *                    displaying in the GUI
315   */
316  public String attributesTipText() {
317    return "The indices of the attributes to print in addition.";
318  }
319 
320  /**
321   * Sets whether to output the class distribution or not.
322   *
323   * @param value       true if the class distribution is to be output as well
324   */
325  public void setOutputDistribution(boolean value) {
326    m_OutputDistribution = value;
327  }
328 
329  /**
330   * Returns whether to output the class distribution as well.
331   *
332   * @return            true if the class distribution is output as well
333   */
334  public boolean getOutputDistribution() {
335    return m_OutputDistribution;
336  }
337
338  /**
339   * Returns the tip text for this property.
340   *
341   * @return            tip text for this property suitable for
342   *                    displaying in the GUI
343   */
344  public String outputDistributionTipText() {
345    return "Whether to ouput the class distribution as well (only nominal class attributes).";
346  }
347 
348  /**
349   * Returns the default number of digits to output after the decimal point.
350   *
351   * @return            the default number of digits
352   */
353  public int getDefaultNumDecimals() {
354    return 3;
355  }
356 
357  /**
358   * Sets the number of digits to output after the decimal point.
359   *
360   * @param value       the number of digits
361   */
362  public void setNumDecimals(int value) {
363    if (value >= 0)
364      m_NumDecimals = value;
365    else
366      System.err.println(
367          "Number of decimals cannot be negative (provided: " + value + ")!");
368  }
369 
370  /**
371   * Returns the number of digits to output after the decimal point.
372   *
373   * @return            the number of digits
374   */
375  public int getNumDecimals() {
376    return m_NumDecimals;
377  }
378
379  /**
380   * Returns the tip text for this property.
381   *
382   * @return            tip text for this property suitable for
383   *                    displaying in the GUI
384   */
385  public String numDecimalsTipText() {
386    return "The number of digits to output after the decimal point.";
387  }
388 
389  /**
390   * Sets the output file to write to. A directory disables this feature.
391   *
392   * @param value       the file to write to or a directory
393   */
394  public void setOutputFile(File value) {
395    m_OutputFile = value;
396  }
397 
398  /**
399   * Returns the output file to write to. A directory if turned off.
400   *
401   * @return            the file to write to or a directory
402   */
403  public File getOutputFile() {
404    return m_OutputFile;
405  }
406
407  /**
408   * Returns the tip text for this property.
409   *
410   * @return            tip text for this property suitable for
411   *                    displaying in the GUI
412   */
413  public String outputFileTipText() {
414    return "The file to write the generated output to (disabled if path is a directory).";
415  }
416 
417  /**
418   * Sets whether to the regular output is suppressed in case the output is
419   * stored in a file.
420   *
421   * @param value       true if the regular output is to be suppressed
422   */
423  public void setSuppressOutput(boolean value) {
424    m_SuppressOutput = value;
425  }
426 
427  /**
428   * Returns whether to the regular output is suppressed in case the output
429   * is stored in a file.
430   *
431   * @return            true if the regular output is to be suppressed
432   */
433  public boolean getSuppressOutput() {
434    return m_SuppressOutput;
435  }
436
437  /**
438   * Returns the tip text for this property.
439   *
440   * @return            tip text for this property suitable for
441   *                    displaying in the GUI
442   */
443  public String suppressOutputTipText() {
444    return "Whether to suppress the regular output when storing the output in a file.";
445  }
446 
447  /**
448   * Performs basic checks.
449   *
450   * @return            null if everything is in order, otherwise the error message
451   */
452  protected String checkBasic() {
453    if (m_Buffer == null)
454      return "Buffer is null!";
455   
456    if (m_Header == null)
457      return "No dataset structure provided!";
458   
459    if (m_Attributes != null)
460      m_Attributes.setUpper(m_Header.numAttributes() - 1);
461   
462    return null;
463  }
464
465  /**
466   * Returns whether regular output is generated or not.
467   *
468   * @return            true if regular output is generated
469   */
470  public boolean generatesOutput() {
471    return    m_OutputFile.isDirectory() 
472           || (!m_OutputFile.isDirectory() && !m_SuppressOutput);
473  }
474 
475  /**
476   * If an output file was defined, then the string gets added to the file
477   * buffer, otherwise to the actual buffer.
478   *
479   * @param s           the string to append
480   * @see               #m_Buffer
481   * @see               #m_FileBuffer
482   */
483  protected void append(String s) {
484    if (generatesOutput())
485      m_Buffer.append(s);
486    if (!m_OutputFile.isDirectory())
487      m_FileBuffer.append(s);
488  }
489 
490  /**
491   * Performs checks whether everything is correctly setup for the header.
492   *
493   * @return            null if everything is in order, otherwise the error message
494   */
495  protected String checkHeader() {
496    return checkBasic();
497  }
498 
499  /**
500   * Performs the actual printing of the header.
501   */
502  protected abstract void doPrintHeader();
503 
504  /**
505   * Prints the header to the buffer.
506   */
507  public void printHeader() {
508    String      error;
509   
510    if ((error = checkHeader()) != null)
511      throw new IllegalStateException(error);
512   
513    doPrintHeader();
514  }
515 
516  /**
517   * Performs the actual printing of the classification.
518   *
519   * @param classifier  the classifier to use for printing the classification
520   * @param inst        the instance to print
521   * @param index       the index of the instance
522   * @throws Exception  if printing of classification fails
523   */
524  protected abstract void doPrintClassification(Classifier classifier, Instance inst, int index) throws Exception;
525 
526  /**
527   * Prints the classification to the buffer.
528   *
529   * @param classifier  the classifier to use for printing the classification
530   * @param inst        the instance to print
531   * @param index       the index of the instance
532   * @throws Exception  if check fails or error occurs during printing of classification
533   */
534  public void printClassification(Classifier classifier, Instance inst, int index) throws Exception {
535    String      error;
536   
537    if ((error = checkBasic()) != null)
538      throw new WekaException(error);
539   
540    doPrintClassification(classifier, inst, index);
541  }
542 
543  /**
544   * Prints the classifications to the buffer.
545   *
546   * @param classifier  the classifier to use for printing the classifications
547   * @param testset     the data source to obtain the test instances from
548   * @throws Exception  if check fails or error occurs during printing of classifications
549   */
550  public void printClassifications(Classifier classifier, DataSource testset) throws Exception {
551    int         i;
552    Instances   test;
553    Instance    inst;
554   
555    i = 0;
556    testset.reset();
557    test = testset.getStructure(m_Header.classIndex());
558    while (testset.hasMoreElements(test)) {
559      inst = testset.nextElement(test);
560      doPrintClassification(classifier, inst, i);
561      i++;
562    }
563  }
564 
565  /**
566   * Prints the classifications to the buffer.
567   *
568   * @param classifier  the classifier to use for printing the classifications
569   * @param testset     the test instances
570   * @throws Exception  if check fails or error occurs during printing of classifications
571   */
572  public void printClassifications(Classifier classifier, Instances testset) throws Exception {
573    int         i;
574
575    for (i = 0; i < testset.numInstances(); i++)
576      doPrintClassification(classifier, testset.instance(i), i);
577  }
578 
579  /**
580   * Performs the actual printing of the footer.
581   */
582  protected abstract void doPrintFooter();
583 
584  /**
585   * Prints the footer to the buffer. This will also store the generated
586   * output in a file if an output file was specified.
587   *
588   * @throws Exception  if check fails
589   */
590  public void printFooter() throws Exception {
591    String              error;
592    BufferedWriter      writer;
593   
594    if ((error = checkBasic()) != null)
595      throw new WekaException(error);
596   
597    doPrintFooter();
598   
599    // write output to file
600    if (!m_OutputFile.isDirectory()) {
601      try {
602        writer = new BufferedWriter(new FileWriter(m_OutputFile));
603        writer.write(m_FileBuffer.toString());
604        writer.newLine();
605        writer.flush();
606        writer.close();
607      }
608      catch (Exception e) {
609        e.printStackTrace();
610      }
611    }
612  }
613 
614  /**
615   * Prints the header, classifications and footer to the buffer.
616   *
617   * @param classifier  the classifier to use for printing the classifications
618   * @param testset     the data source to obtain the test instances from
619   * @throws Exception  if check fails or error occurs during printing of classifications
620   */
621  public void print(Classifier classifier, DataSource testset) throws Exception {
622    printHeader();
623    printClassifications(classifier, testset);
624    printFooter();
625  }
626 
627  /**
628   * Prints the header, classifications and footer to the buffer.
629   *
630   * @param classifier  the classifier to use for printing the classifications
631   * @param testset     the test instances
632   * @throws Exception  if check fails or error occurs during printing of classifications
633   */
634  public void print(Classifier classifier, Instances testset) throws Exception {
635    printHeader();
636    printClassifications(classifier, testset);
637    printFooter();
638  }
639 
640  /**
641   * Returns a fully configured object from the given commandline.
642   *
643   * @param cmdline     the commandline to turn into an object
644   * @return            the object or null in case of an error
645   */
646  public static AbstractOutput fromCommandline(String cmdline) {
647    AbstractOutput      result;
648    String[]                            options;
649    String                              classname;
650   
651    try {
652      options    = Utils.splitOptions(cmdline);
653      classname  = options[0];
654      options[0] = "";
655      result     = (AbstractOutput) Utils.forName(AbstractOutput.class, classname, options);
656    }
657    catch (Exception e) {
658      result = null;
659    }
660   
661    return result;
662  }
663}
Note: See TracBrowser for help on using the repository browser.