source: branches/MetisMQI/src/test/java/weka/core/tokenizers/AbstractTokenizerTest.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 10.3 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * Copyright (C) 2007 University of Waikato
19 */
20
21package weka.core.tokenizers;
22
23import weka.core.CheckGOE;
24import weka.core.CheckOptionHandler;
25import weka.core.FastVector;
26import weka.core.OptionHandler;
27import weka.core.SerializationHelper;
28import weka.core.CheckScheme.PostProcessor;
29import weka.test.Regression;
30
31import junit.framework.TestCase;
32
33/**
34 * Abstract Test class for Tokenizers.
35 *
36 * @author <a href="mailto:len@reeltwo.com">Len Trigg</a>
37 * @author FracPete (fracpete at waikato dot ac dot nz)
38 * @version $Revision: 1.2 $
39 *
40 * @see PostProcessor
41 */
42public abstract class AbstractTokenizerTest 
43  extends TestCase {
44 
45  /** data for the regression tests */
46  protected String[] m_Data;
47 
48  /** The tokenizer to be tested */
49  protected Tokenizer m_Tokenizer;
50 
51  /** the results of the regression tests */
52  protected FastVector[] m_RegressionResults;
53 
54  /** the OptionHandler tester */
55  protected CheckOptionHandler m_OptionTester;
56 
57  /** for testing GOE stuff */
58  protected CheckGOE m_GOETester;
59 
60  /**
61   * Constructs the <code>AbstractTokenizerTest</code>. Called by subclasses.
62   *
63   * @param name the name of the test class
64   */
65  public AbstractTokenizerTest(String name) { 
66    super(name); 
67  }
68
69  /**
70   * returns the data to use in the tests
71   *
72   * @return            the data to use in the tests
73   */
74  protected String[] getData() {
75    return new String[]{
76      "Humpty Dumpty was sitting, with his legs crossed like a Turk, on the top of a high wall -- such a narrow one that Alice quite wondered how he could keep his balance -- and, as his eyes were steadily fixed in the opposite direction, and he didn't take the least notice of her, she thought he must be a stuffed figure, after all.",
77      "The planet Mars, I scarcely need remind the reader, revolves about the sun at a mean distance of 140,000,000 miles, and the light and heat it receives from the sun is barely half of that received by this world.",
78      "I've studied now Philosophy And Jurisprudence, Medicine, And even, alas! Theology All through and through with ardour keen! Here now I stand, poor fool, and see I'm just as wise as formerly."
79    };
80  }
81 
82  /**
83   * Configures the CheckOptionHandler uses for testing the option handling.
84   * Sets the tokenizer returned from the getTokenizer() method if that can
85   * handle options.
86   *
87   * @return    the fully configured CheckOptionHandler
88   * @see       #getTokenizer()
89   */
90  protected CheckOptionHandler getOptionTester() {
91    CheckOptionHandler          result;
92   
93    result = new CheckOptionHandler();
94    if (getTokenizer() instanceof OptionHandler)
95      result.setOptionHandler((OptionHandler) getTokenizer());
96    else
97      result.setOptionHandler(null);
98    result.setUserOptions(new String[0]);
99    result.setSilent(true);
100   
101    return result;
102  }
103 
104  /**
105   * Configures the CheckGOE used for testing GOE stuff.
106   * Sets the Tokenizer returned from the getTokenizer() method.
107   *
108   * @return    the fully configured CheckGOE
109   * @see       #getTokenizer()
110   */
111  protected CheckGOE getGOETester() {
112    CheckGOE            result;
113   
114    result = new CheckGOE();
115    result.setObject(getTokenizer());
116    result.setSilent(true);
117   
118    return result;
119  }
120 
121  /**
122   * Called by JUnit before each test method. This implementation creates
123   * the default tokenizer to test and loads a test set of Instances.
124   *
125   * @exception Exception if an error occurs reading the example instances.
126   */
127  protected void setUp() throws Exception {
128    m_Tokenizer         = getTokenizer();
129    m_OptionTester      = getOptionTester();
130    m_GOETester         = getGOETester();
131    m_Data              = getData();
132    m_RegressionResults = new FastVector[m_Data.length];
133  }
134
135  /** Called by JUnit after each test method */
136  protected void tearDown() {
137    m_Tokenizer         = null;
138    m_OptionTester      = null;
139    m_GOETester         = null;
140    m_Data              = null;
141    m_RegressionResults = null;
142  }
143
144  /**
145   * Used to create an instance of a specific tokenizer.
146   *
147   * @return a suitably configured <code>Tokenizer</code> value
148   */
149  public abstract Tokenizer getTokenizer();
150
151  /**
152   * tests whether the scheme declares a serialVersionUID.
153   */
154  public void testSerialVersionUID() {
155    boolean     result;
156
157    result = !SerializationHelper.needsUID(m_Tokenizer.getClass());
158
159    if (!result)
160      fail("Doesn't declare serialVersionUID!");
161  }
162
163  /**
164   * tests whether the tokenizer correctly initializes in the
165   * buildTokenizer method
166   */
167  public void testBuildInitialization() {
168    boolean             result;
169    int                 i;
170    int                 n;
171    String[][][]        processed;
172    String              msg;
173   
174    // process data twice
175    processed = new String[2][m_Data.length][];
176    for (n = 0; n < 2; n++) {
177      for (i = 0; i < m_Data.length; i++) {
178        try {
179          processed[n][i] = Tokenizer.tokenize(m_Tokenizer, new String[]{m_Data[i]});
180        }
181        catch (Exception e) {
182          processed[n][i] = new String[0];
183        }
184      }
185    }
186   
187    // was the same data produced?
188    result = true;
189    msg    = "";
190    for (i = 0; i < m_Data.length; i++) {
191      if (processed[0].length == processed[1].length) {
192        for (n = 0; n < processed[0][i].length; n++) {
193          if (!processed[0][i][n].equals(processed[1][i][n])) {
194            result = false;
195            msg    = "different substrings";
196            break;
197          }
198        }
199      }
200      else {
201        result = false;
202        msg    = "different number of substrings";
203        break;
204      }
205    }
206
207    if (!result)
208      fail("Incorrect build initialization (" + msg + ")!");
209  }
210
211  /**
212   * Runs the tokenizer over the given string and returns the generated
213   * tokens.
214   *
215   * @param s           the string to tokenize
216   * @return            a <code>FastVector</code> containing the tokens.
217   * @throws Exception  if tokenization fails
218   */
219  protected FastVector useTokenizer(String s) throws Exception {
220    String[]    tokens;
221    FastVector  result;
222    int         i;
223   
224    tokens = Tokenizer.tokenize(m_Tokenizer, new String[]{s});
225   
226    result = new FastVector();
227    for (i = 0; i < tokens.length; i++)
228      result.addElement(tokens[i]);
229   
230    return result;
231     
232  }
233
234  /**
235   * Returns a string containing all the tokens.
236   *
237   * @param tokens      a <code>FastVector</code> containing the tokens
238   * @return            a <code>String</code> representing the vector of tokens.
239   */
240  protected String predictionsToString(FastVector tokens) {
241    StringBuffer sb = new StringBuffer();
242   
243    sb.append(tokens.size()).append(" tokens\n");
244    for (int i = 0; i < tokens.size(); i++)
245      sb.append(tokens.elementAt(i)).append('\n');
246   
247    return sb.toString();
248  }
249
250  /**
251   * Runs a regression test -- this checks that the output of the tested
252   * object matches that in a reference version. When this test is
253   * run without any pre-existing reference output, the reference version
254   * is created.
255   */
256  public void testRegression() {
257    int         i;
258    boolean     succeeded;
259    Regression  reg;
260   
261    reg       = new Regression(this.getClass());
262    succeeded = false;
263   
264    for (i = 0; i < m_Data.length; i++) {
265      try {
266        m_RegressionResults[i] = useTokenizer(m_Data[i]);
267        succeeded = true;
268        reg.println(predictionsToString(m_RegressionResults[i]));
269      }
270      catch (Exception e) {
271        m_RegressionResults[i] = null;
272      }
273    }
274   
275    if (!succeeded) {
276      fail("Problem during regression testing: no successful tokens generated for any string");
277    }
278
279    try {
280      String diff = reg.diff();
281      if (diff == null) {
282        System.err.println("Warning: No reference available, creating."); 
283      } else if (!diff.equals("")) {
284        fail("Regression test failed. Difference:\n" + diff);
285      }
286    } 
287    catch (java.io.IOException ex) {
288      fail("Problem during regression testing.\n" + ex);
289    }
290  }
291 
292  /**
293   * tests the listing of the options
294   */
295  public void testListOptions() {
296    if (m_OptionTester.getOptionHandler() != null) {
297      if (!m_OptionTester.checkListOptions())
298        fail("Options cannot be listed via listOptions.");
299    }
300  }
301 
302  /**
303   * tests the setting of the options
304   */
305  public void testSetOptions() {
306    if (m_OptionTester.getOptionHandler() != null) {
307      if (!m_OptionTester.checkSetOptions())
308        fail("setOptions method failed.");
309    }
310  }
311 
312  /**
313   * tests whether the default settings are processed correctly
314   */
315  public void testDefaultOptions() {
316    if (m_OptionTester.getOptionHandler() != null) {
317      if (!m_OptionTester.checkDefaultOptions())
318        fail("Default options were not processed correctly.");
319    }
320  }
321 
322  /**
323   * tests whether there are any remaining options
324   */
325  public void testRemainingOptions() {
326    if (m_OptionTester.getOptionHandler() != null) {
327      if (!m_OptionTester.checkRemainingOptions())
328        fail("There were 'left-over' options.");
329    }
330  }
331 
332  /**
333   * tests the whether the user-supplied options stay the same after setting.
334   * getting, and re-setting again.
335   *
336   * @see       #getOptionTester()
337   */
338  public void testCanonicalUserOptions() {
339    if (m_OptionTester.getOptionHandler() != null) {
340      if (!m_OptionTester.checkCanonicalUserOptions())
341        fail("setOptions method failed");
342    }
343  }
344 
345  /**
346   * tests the resetting of the options to the default ones
347   */
348  public void testResettingOptions() {
349    if (m_OptionTester.getOptionHandler() != null) {
350      if (!m_OptionTester.checkSetOptions())
351        fail("Resetting of options failed");
352    }
353  }
354 
355  /**
356   * tests for a globalInfo method
357   */
358  public void testGlobalInfo() {
359    if (!m_GOETester.checkGlobalInfo())
360      fail("No globalInfo method");
361  }
362 
363  /**
364   * tests the tool tips
365   */
366  public void testToolTips() {
367    if (!m_GOETester.checkToolTips())
368      fail("Tool tips inconsistent");
369  }
370}
Note: See TracBrowser for help on using the repository browser.