Context Navigation

WordTokenizer.java

Last change on this file was 29, checked in by gnappo, 15 years ago
Taggata versione per la demo e aggiunto branch.
File size: 3.1 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* SimpleStringTokenizer.java
19	* Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20	*/
21
22	package weka.core.tokenizers;
23
24	import weka.core.RevisionUtils;
25
26	import java.util.StringTokenizer;
27
28	/**
29	<!-- globalinfo-start -->
30	* A simple tokenizer that is using the java.util.StringTokenizer class to tokenize the strings.
31	* <p/>
32	<!-- globalinfo-end -->
33	*
34	<!-- options-start -->
35	* Valid options are: <p/>
36	*
37	* <pre> -delimiters <value>
38	* The delimiters to use
39	* (default ' \r\n\t.,;:'"()?!').</pre>
40	*
41	<!-- options-end -->
42	*
43	* @author FracPete (fracpete at waikato dot ac dot nz)
44	* @version $Revision: 5953 $
45	*/
46	public class WordTokenizer
47	extends CharacterDelimitedTokenizer {
48
49	/** for serialization */
50	private static final long serialVersionUID = -930893034037880773L;
51
52	/** the actual tokenizer */
53	protected transient StringTokenizer m_Tokenizer;
54
55	/**
56	* Returns a string describing the stemmer
57	*
58	* @return a description suitable for displaying in the
59	* explorer/experimenter gui
60	*/
61	public String globalInfo() {
62	return
63	"A simple tokenizer that is using the java.util.StringTokenizer "
64	+ "class to tokenize the strings.";
65	}
66
67	/**
68	* Tests if this enumeration contains more elements.
69	*
70	* @return true if and only if this enumeration object contains
71	* at least one more element to provide; false otherwise.
72	*/
73	public boolean hasMoreElements() {
74	return m_Tokenizer.hasMoreElements();
75	}
76
77	/**
78	* Returns the next element of this enumeration if this enumeration object
79	* has at least one more element to provide.
80	*
81	* @return the next element of this enumeration.
82	*/
83	public Object nextElement() {
84	return m_Tokenizer.nextElement();
85	}
86
87	/**
88	* Sets the string to tokenize. Tokenization happens immediately.
89	*
90	* @param s the string to tokenize
91	*/
92	public void tokenize(String s) {
93	m_Tokenizer = new StringTokenizer(s, getDelimiters());
94	}
95
96	/**
97	* Returns the revision string.
98	*
99	* @return the revision
100	*/
101	public String getRevision() {
102	return RevisionUtils.extract("$Revision: 5953 $");
103	}
104
105	/**
106	* Runs the tokenizer with the given options and strings to tokenize.
107	* The tokens are printed to stdout.
108	*
109	* @param args the commandline options and strings to tokenize
110	*/
111	public static void main(String[] args) {
112	runTokenizer(new WordTokenizer(), args);
113	}
114	}
115

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/MetisMQI/src/main/java/weka/core/tokenizers/WordTokenizer.java

Download in other formats: