source: src/main/java/weka/classifiers/CheckSource.java @ 10

Last change on this file since 10 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 11.7 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * CheckSource.java
19 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.classifiers;
23
24import weka.core.Instances;
25import weka.core.Option;
26import weka.core.OptionHandler;
27import weka.core.RevisionHandler;
28import weka.core.RevisionUtils;
29import weka.core.Utils;
30import weka.core.converters.ConverterUtils.DataSource;
31
32import java.io.File;
33import java.util.Enumeration;
34import java.util.Vector;
35
36/**
37 * A simple class for checking the source generated from Classifiers
38 * implementing the <code>weka.classifiers.Sourcable</code> interface.
39 * It takes a classifier, the classname of the generated source
40 * and the dataset the source was generated with as parameters and tests
41 * the output of the built classifier against the output of the generated
42 * source. Use option '-h' to display all available commandline options.
43 *
44 <!-- options-start -->
45 * Valid options are: <p/>
46 *
47 * <pre> -W &lt;classname and options&gt;
48 *  The classifier (incl. options) that was used to generate
49 *  the source code.</pre>
50 *
51 * <pre> -S &lt;classname&gt;
52 *  The classname of the generated source code.</pre>
53 *
54 * <pre> -t &lt;file&gt;
55 *  The training set with which the source code was generated.</pre>
56 *
57 * <pre> -c &lt;index&gt;
58 *  The class index of the training set. 'first' and 'last' are
59 *  valid indices.
60 *  (default: last)</pre>
61 *
62 <!-- options-end -->
63 *
64 * Options after -- are passed to the designated classifier (specified with -W).
65 *
66 * @author  fracpete (fracpete at waikato dot ac dot nz)
67 * @version $Revision: 6041 $
68 * @see     weka.classifiers.Sourcable
69 */
70public class CheckSource
71  implements OptionHandler, RevisionHandler {
72
73  /** the classifier used for generating the source code */
74  protected Classifier m_Classifier = null;
75
76  /** the generated source code */
77  protected Classifier m_SourceCode = null;
78
79  /** the dataset to use for testing */
80  protected File m_Dataset = null;
81
82  /** the class index */
83  protected int m_ClassIndex = -1;
84
85  /**
86   * Returns an enumeration describing the available options.
87   *
88   * @return an enumeration of all the available options.
89   */
90  public Enumeration listOptions() {
91    Vector result = new Vector();
92
93    result.addElement(new Option(
94        "\tThe classifier (incl. options) that was used to generate\n"
95        + "\tthe source code.",
96        "W", 1, "-W <classname and options>"));
97
98    result.addElement(new Option(
99        "\tThe classname of the generated source code.",
100        "S", 1, "-S <classname>"));
101
102    result.addElement(new Option(
103        "\tThe training set with which the source code was generated.",
104        "t", 1, "-t <file>"));
105
106    result.addElement(new Option(
107        "\tThe class index of the training set. 'first' and 'last' are\n"
108        + "\tvalid indices.\n"
109        + "\t(default: last)",
110        "c", 1, "-c <index>"));
111
112    return result.elements();
113  }
114
115  /**
116   * Parses a given list of options. <p/>
117   *
118   <!-- options-start -->
119   * Valid options are: <p/>
120   *
121   * <pre> -W &lt;classname and options&gt;
122   *  The classifier (incl. options) that was used to generate
123   *  the source code.</pre>
124   *
125   * <pre> -S &lt;classname&gt;
126   *  The classname of the generated source code.</pre>
127   *
128   * <pre> -t &lt;file&gt;
129   *  The training set with which the source code was generated.</pre>
130   *
131   * <pre> -c &lt;index&gt;
132   *  The class index of the training set. 'first' and 'last' are
133   *  valid indices.
134   *  (default: last)</pre>
135   *
136   <!-- options-end -->
137   *
138   * Options after -- are passed to the designated classifier (specified with
139   * -W).
140   *
141   * @param options the list of options as an array of strings
142   * @throws Exception if an option is not supported
143   */
144  public void setOptions(String[] options) throws Exception {
145    String      tmpStr;
146    String[]    spec;
147    String      classname;
148
149    tmpStr = Utils.getOption('W', options);
150    if (tmpStr.length() > 0) {
151      spec = Utils.splitOptions(tmpStr);
152      if (spec.length == 0)
153        throw new IllegalArgumentException("Invalid classifier specification string");
154      classname = spec[0];
155      spec[0]   = "";
156      setClassifier((Classifier) Utils.forName(Classifier.class, classname, spec));
157    }
158    else {
159      throw new Exception("No classifier (classname + options) provided!");
160    }
161
162    tmpStr = Utils.getOption('S', options);
163    if (tmpStr.length() > 0) {
164      spec = Utils.splitOptions(tmpStr);
165      if (spec.length != 1)
166        throw new IllegalArgumentException("Invalid source code specification string");
167      classname = spec[0];
168      spec[0]   = "";
169      setSourceCode((Classifier) Utils.forName(Classifier.class, classname, spec));
170    }
171    else {
172      throw new Exception("No source code (classname) provided!");
173    }
174
175    tmpStr = Utils.getOption('t', options);
176    if (tmpStr.length() != 0)
177      setDataset(new File(tmpStr));
178    else
179      throw new Exception("No dataset provided!");
180
181    tmpStr = Utils.getOption('c', options);
182    if (tmpStr.length() != 0) {
183      if (tmpStr.equals("first"))
184        setClassIndex(0);
185      else if (tmpStr.equals("last"))
186        setClassIndex(-1);
187      else
188        setClassIndex(Integer.parseInt(tmpStr) - 1);
189    }
190    else {
191      setClassIndex(-1);
192    }
193  }
194
195  /**
196   * Gets the current settings of the Classifier.
197   *
198   * @return an array of strings suitable for passing to setOptions
199   */
200  public String[] getOptions() {
201    Vector<String>      result;
202
203    result  = new Vector<String>();
204
205    if (getClassifier() != null) {
206      result.add("-W");
207      result.add(getClassifier().getClass().getName() + " "
208          + Utils.joinOptions(((OptionHandler) getClassifier()).getOptions()));
209    }
210
211    if (getSourceCode() != null) {
212      result.add("-S");
213      result.add(getSourceCode().getClass().getName());
214    }
215
216    if (getDataset() != null) {
217      result.add("-t");
218      result.add(m_Dataset.getAbsolutePath());
219    }
220
221    result.add("-c");
222    if (getClassIndex() == -1)
223      result.add("last");
224    else if (getClassIndex() == 0)
225      result.add("first");
226    else
227      result.add("" + (getClassIndex() + 1));
228
229    return result.toArray(new String[result.size()]);
230  }
231
232  /**
233   * Sets the classifier to use for the comparison.
234   *
235   * @param value       the classifier to use
236   */
237  public void setClassifier(Classifier value) {
238    m_Classifier = value;
239  }
240
241  /**
242   * Gets the classifier being used for the tests, can be null.
243   *
244   * @return            the currently set classifier
245   */
246  public Classifier getClassifier() {
247    return m_Classifier;
248  }
249
250  /**
251   * Sets the class to test.
252   *
253   * @param value       the class to test
254   */
255  public void setSourceCode(Classifier value) {
256    m_SourceCode = value;
257  }
258
259  /**
260   * Gets the class to test.
261   *
262   * @return            the currently set class, can be null.
263   */
264  public Classifier getSourceCode() {
265    return m_SourceCode;
266  }
267
268  /**
269   * Sets the dataset to use for testing.
270   *
271   * @param value       the dataset to use.
272   */
273  public void setDataset(File value) {
274    if (!value.exists())
275      throw new IllegalArgumentException(
276          "Dataset '" + value.getAbsolutePath() + "' does not exist!");
277    else
278      m_Dataset = value;
279  }
280
281  /**
282   * Gets the dataset to use for testing, can be null.
283   *
284   * @return            the dataset to use.
285   */
286  public File getDataset() {
287    return m_Dataset;
288  }
289
290  /**
291   * Sets the class index of the dataset.
292   *
293   * @param value       the class index of the dataset.
294   */
295  public void setClassIndex(int value) {
296    m_ClassIndex = value;
297  }
298
299  /**
300   * Gets the class index of the dataset.
301   *
302   * @return            the current class index.
303   */
304  public int getClassIndex() {
305    return m_ClassIndex;
306  }
307
308  /**
309   * performs the comparison test
310   *
311   * @return            true if tests were successful
312   * @throws Exception  if tests fail
313   */
314  public boolean execute() throws Exception {
315    boolean     result;
316    Classifier  cls;
317    Classifier  code;
318    int         i;
319    Instances   data;
320    DataSource  source;
321    boolean     numeric;
322    boolean     different;
323    double      predClassifier;
324    double      predSource;
325
326    result = true;
327
328    // a few checks
329    if (getClassifier() == null)
330      throw new Exception("No classifier set!");
331    if (getSourceCode() == null)
332      throw new Exception("No source code set!");
333    if (getDataset() == null)
334      throw new Exception("No dataset set!");
335    if (!getDataset().exists())
336      throw new Exception(
337          "Dataset '" + getDataset().getAbsolutePath() + "' does not exist!");
338
339    // load data
340    source = new DataSource(getDataset().getAbsolutePath());
341    data   = source.getDataSet();
342    if (getClassIndex() == -1)
343      data.setClassIndex(data.numAttributes() - 1);
344    else
345      data.setClassIndex(getClassIndex());
346    numeric = data.classAttribute().isNumeric();
347
348    // build classifier
349    cls = AbstractClassifier.makeCopy(getClassifier());
350    cls.buildClassifier(data);
351
352    code = getSourceCode();
353
354    // compare predictions
355    for (i = 0; i < data.numInstances(); i++) {
356      // perform predictions
357      predClassifier = cls.classifyInstance(data.instance(i));
358      predSource     = code.classifyInstance(data.instance(i));
359
360      // compare both results
361      if (Double.isNaN(predClassifier) && Double.isNaN(predSource)) {
362        different = false;
363      }
364      else {
365        if (numeric)
366          different = !Utils.eq(predClassifier, predSource);
367        else
368          different = ((int) predClassifier != (int) predSource);
369      }
370
371      if (different) {
372        result = false;
373        if (numeric)
374          System.out.println(
375              (i+1) + ". instance (Classifier/Source code): "
376              + predClassifier + " != " + predSource);
377        else
378          System.out.println(
379              (i+1) + ". instance (Classifier/Source code): "
380              + data.classAttribute().value((int) predClassifier)
381              + " != " + data.classAttribute().value((int) predSource));
382      }
383    }
384
385    return result;
386  }
387
388  /**
389   * Returns the revision string.
390   *
391   * @return            the revision
392   */
393  public String getRevision() {
394    return RevisionUtils.extract("$Revision: 6041 $");
395  }
396
397  /**
398   * Executes the tests, use "-h" to list the commandline options.
399   *
400   * @param args        the commandline parameters
401   * @throws Exception  if something goes wrong
402   */
403  public static void main(String[] args) throws Exception{
404    CheckSource         check;
405    StringBuffer        text;
406    Enumeration         enm;
407
408    check = new CheckSource();
409    if (Utils.getFlag('h', args)) {
410      text = new StringBuffer();
411      text.append("\nHelp requested:\n\n");
412      enm = check.listOptions();
413      while (enm.hasMoreElements()) {
414        Option option = (Option) enm.nextElement();
415        text.append(option.synopsis() + "\n");
416        text.append(option.description() + "\n");
417      }
418      System.out.println("\n" + text + "\n");
419    }
420    else {
421      check.setOptions(args);
422      if (check.execute())
423        System.out.println("Tests OK!");
424      else
425        System.out.println("Tests failed!");
426    }
427  }
428}
Note: See TracBrowser for help on using the repository browser.