| 1 | /* |
|---|
| 2 | * This program is free software; you can redistribute it and/or modify |
|---|
| 3 | * it under the terms of the GNU General Public License as published by |
|---|
| 4 | * the Free Software Foundation; either version 2 of the License, or |
|---|
| 5 | * (at your option) any later version. |
|---|
| 6 | * |
|---|
| 7 | * This program is distributed in the hope that it will be useful, |
|---|
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 10 | * GNU General Public License for more details. |
|---|
| 11 | * |
|---|
| 12 | * You should have received a copy of the GNU General Public License |
|---|
| 13 | * along with this program; if not, write to the Free Software |
|---|
| 14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|---|
| 15 | */ |
|---|
| 16 | |
|---|
| 17 | /* |
|---|
| 18 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand |
|---|
| 19 | */ |
|---|
| 20 | |
|---|
| 21 | package weka.core.tokenizers; |
|---|
| 22 | |
|---|
| 23 | import junit.framework.Test; |
|---|
| 24 | import junit.framework.TestSuite; |
|---|
| 25 | |
|---|
| 26 | /** |
|---|
| 27 | * Tests AlphabeticTokenizer. Run from the command line with:<p> |
|---|
| 28 | * java weka.core.tokenizers.AlphabeticTokenizerTest |
|---|
| 29 | * |
|---|
| 30 | * @author FracPete (fracpete at waikato dot ac dot nz) |
|---|
| 31 | * @version $Revision: 1.1 $ |
|---|
| 32 | */ |
|---|
| 33 | public class AlphabeticTokenizerTest |
|---|
| 34 | extends AbstractTokenizerTest { |
|---|
| 35 | |
|---|
| 36 | public AlphabeticTokenizerTest(String name) { |
|---|
| 37 | super(name); |
|---|
| 38 | } |
|---|
| 39 | |
|---|
| 40 | /** Creates a default AlphabeticTokenizer */ |
|---|
| 41 | public Tokenizer getTokenizer() { |
|---|
| 42 | return new AlphabeticTokenizer(); |
|---|
| 43 | } |
|---|
| 44 | |
|---|
| 45 | /** |
|---|
| 46 | * tests the number of generated tokens |
|---|
| 47 | */ |
|---|
| 48 | public void testNumberOfGeneratedTokens() { |
|---|
| 49 | String s; |
|---|
| 50 | String[] result; |
|---|
| 51 | |
|---|
| 52 | // no numbers included |
|---|
| 53 | s = "HOWEVER, the egg only got larger and larger, and more and more human"; |
|---|
| 54 | try { |
|---|
| 55 | result = Tokenizer.tokenize(m_Tokenizer, new String[]{s}); |
|---|
| 56 | assertEquals("number of tokens differ (1)", 13, result.length); |
|---|
| 57 | } |
|---|
| 58 | catch (Exception e) { |
|---|
| 59 | fail("Error tokenizing string '" + s + "'!"); |
|---|
| 60 | } |
|---|
| 61 | |
|---|
| 62 | // numbers included |
|---|
| 63 | s = "The planet Mars, I scarcely need remind the reader, revolves about the sun at a mean distance of 140,000,000 miles"; |
|---|
| 64 | try { |
|---|
| 65 | result = Tokenizer.tokenize(m_Tokenizer, new String[]{s}); |
|---|
| 66 | assertEquals("number of tokens differ (2)", 19, result.length); |
|---|
| 67 | } |
|---|
| 68 | catch (Exception e) { |
|---|
| 69 | fail("Error tokenizing string '" + s + "'!"); |
|---|
| 70 | } |
|---|
| 71 | } |
|---|
| 72 | |
|---|
| 73 | public static Test suite() { |
|---|
| 74 | return new TestSuite(AlphabeticTokenizerTest.class); |
|---|
| 75 | } |
|---|
| 76 | |
|---|
| 77 | public static void main(String[] args){ |
|---|
| 78 | junit.textui.TestRunner.run(suite()); |
|---|
| 79 | } |
|---|
| 80 | } |
|---|