Context Navigation

NGramTokenizerTest.java

Last change on this file was 29, checked in by gnappo, 15 years ago
Taggata versione per la demo e aggiunto branch.
File size: 2.5 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
19	*/
20
21	package weka.core.tokenizers;
22
23	import junit.framework.Test;
24	import junit.framework.TestSuite;
25
26	/**
27	* Tests NGramTokenizer. Run from the command line with:<p>
28	* java weka.core.tokenizers.NGramTokenizerTest
29	*
30	* @author FracPete (fracpete at waikato dot ac dot nz)
31	* @version $Revision: 1.1 $
32	*/
33	public class NGramTokenizerTest
34	extends AbstractTokenizerTest {
35
36	public NGramTokenizerTest(String name) {
37	super(name);
38	}
39
40	/** Creates a default NGramTokenizer */
41	public Tokenizer getTokenizer() {
42	return new NGramTokenizer();
43	}
44
45	/**
46	* tests the number of generated tokens
47	*/
48	public void testNumberOfGeneratedTokens() {
49	String s;
50	String[] result;
51
52	s = "HOWEVER, the egg only got larger and larger, and more and more human";
53
54	// only 1-grams
55	try {
56	result = Tokenizer.tokenize(m_Tokenizer, new String[]{"-min", "1", "-max", "1", s});
57	assertEquals("number of tokens differ (1)", 13, result.length);
58	}
59	catch (Exception e) {
60	fail("Error tokenizing string '" + s + "'!");
61	}
62
63	// only 2-grams
64	try {
65	result = Tokenizer.tokenize(m_Tokenizer, new String[]{"-min", "2", "-max", "2", s});
66	assertEquals("number of tokens differ (2)", 12, result.length);
67	}
68	catch (Exception e) {
69	fail("Error tokenizing string '" + s + "'!");
70	}
71
72	// 1 to 3-grams
73	try {
74	result = Tokenizer.tokenize(m_Tokenizer, new String[]{"-min", "1", "-max", "3", s});
75	assertEquals("number of tokens differ (3)", 36, result.length);
76	}
77	catch (Exception e) {
78	fail("Error tokenizing string '" + s + "'!");
79	}
80	}
81
82	public static Test suite() {
83	return new TestSuite(NGramTokenizerTest.class);
84	}
85
86	public static void main(String[] args){
87	junit.textui.TestRunner.run(suite());
88	}
89	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: tags/MetisMQIDemo/src/test/java/weka/core/tokenizers/NGramTokenizerTest.java

Download in other formats: