source: branches/MetisMQI/src/main/java/weka/core/tokenizers/WordTokenizer.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 3.1 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * SimpleStringTokenizer.java
19 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.core.tokenizers;
23
24import weka.core.RevisionUtils;
25
26import java.util.StringTokenizer;
27
28/**
29 <!-- globalinfo-start -->
30 * A simple tokenizer that is using the java.util.StringTokenizer class to tokenize the strings.
31 * <p/>
32 <!-- globalinfo-end -->
33 *
34 <!-- options-start -->
35 * Valid options are: <p/>
36 *
37 * <pre> -delimiters &lt;value&gt;
38 *  The delimiters to use
39 *  (default ' \r\n\t.,;:'"()?!').</pre>
40 *
41 <!-- options-end -->
42 *
43 * @author  FracPete (fracpete at waikato dot ac dot nz)
44 * @version $Revision: 5953 $
45 */
46public class WordTokenizer
47  extends CharacterDelimitedTokenizer {
48
49  /** for serialization */
50  private static final long serialVersionUID = -930893034037880773L;
51 
52  /** the actual tokenizer */
53  protected transient StringTokenizer m_Tokenizer;
54 
55  /**
56   * Returns a string describing the stemmer
57   *
58   * @return            a description suitable for displaying in the
59   *                    explorer/experimenter gui
60   */
61  public String globalInfo() {
62    return 
63        "A simple tokenizer that is using the java.util.StringTokenizer "
64      + "class to tokenize the strings.";
65  }
66
67  /**
68   * Tests if this enumeration contains more elements.
69   *
70   * @return            true if and only if this enumeration object contains
71   *                    at least one more element to provide; false otherwise.
72   */
73  public boolean hasMoreElements() {
74    return m_Tokenizer.hasMoreElements();
75  }
76
77  /**
78   * Returns the next element of this enumeration if this enumeration object
79   * has at least one more element to provide.
80   *
81   * @return            the next element of this enumeration.
82   */
83  public Object nextElement() {
84    return m_Tokenizer.nextElement();
85  }
86 
87  /**
88   * Sets the string to tokenize. Tokenization happens immediately.
89   *
90   * @param s           the string to tokenize
91   */
92  public void tokenize(String s) {
93    m_Tokenizer = new StringTokenizer(s, getDelimiters());
94  }
95 
96  /**
97   * Returns the revision string.
98   *
99   * @return            the revision
100   */
101  public String getRevision() {
102    return RevisionUtils.extract("$Revision: 5953 $");
103  }
104
105  /**
106   * Runs the tokenizer with the given options and strings to tokenize.
107   * The tokens are printed to stdout.
108   *
109   * @param args        the commandline options and strings to tokenize
110   */
111  public static void main(String[] args) {
112    runTokenizer(new WordTokenizer(), args);
113  }
114}
115
Note: See TracBrowser for help on using the repository browser.