source: tags/MetisMQIDemo/src/test/java/weka/core/tokenizers/NGramTokenizerTest.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 2.5 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
19 */
20
21package weka.core.tokenizers;
22
23import junit.framework.Test;
24import junit.framework.TestSuite;
25
26/**
27 * Tests NGramTokenizer. Run from the command line with:<p>
28 * java weka.core.tokenizers.NGramTokenizerTest
29 *
30 * @author FracPete (fracpete at waikato dot ac dot nz)
31 * @version $Revision: 1.1 $
32 */
33public class NGramTokenizerTest
34  extends AbstractTokenizerTest {
35
36  public NGramTokenizerTest(String name) {
37    super(name);
38  }
39
40  /** Creates a default NGramTokenizer */
41  public Tokenizer getTokenizer() {
42    return new NGramTokenizer();
43  }
44
45  /**
46   * tests the number of generated tokens
47   */
48  public void testNumberOfGeneratedTokens() {
49    String      s;
50    String[]    result;
51   
52    s = "HOWEVER, the egg only got larger and larger, and more and more human";
53
54    // only 1-grams
55    try {
56      result = Tokenizer.tokenize(m_Tokenizer, new String[]{"-min", "1", "-max", "1", s});
57      assertEquals("number of tokens differ (1)", 13, result.length);
58    }
59    catch (Exception e) {
60      fail("Error tokenizing string '" + s + "'!");
61    }
62
63    // only 2-grams
64    try {
65      result = Tokenizer.tokenize(m_Tokenizer, new String[]{"-min", "2", "-max", "2", s});
66      assertEquals("number of tokens differ (2)", 12, result.length);
67    }
68    catch (Exception e) {
69      fail("Error tokenizing string '" + s + "'!");
70    }
71
72    // 1 to 3-grams
73    try {
74      result = Tokenizer.tokenize(m_Tokenizer, new String[]{"-min", "1", "-max", "3", s});
75      assertEquals("number of tokens differ (3)", 36, result.length);
76    }
77    catch (Exception e) {
78      fail("Error tokenizing string '" + s + "'!");
79    }
80  }
81
82  public static Test suite() {
83    return new TestSuite(NGramTokenizerTest.class);
84  }
85
86  public static void main(String[] args){
87    junit.textui.TestRunner.run(suite());
88  }
89}
Note: See TracBrowser for help on using the repository browser.