source: branches/MetisMQI/src/main/java/weka/core/tokenizers/CharacterDelimitedTokenizer.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 3.7 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * DelimitedTokenizer.java
19 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.core.tokenizers;
23
24import weka.core.Option;
25import weka.core.RevisionUtils;
26import weka.core.Utils;
27
28import java.util.Enumeration;
29import java.util.Vector;
30
31/**
32 * Abstract superclass for tokenizers that take characters as delimiters.
33 *
34 * @author  fracpete (fracpete at waikato dot ac dot nz)
35 * @version $Revision: 5953 $
36 */
37public abstract class CharacterDelimitedTokenizer
38  extends Tokenizer {
39
40  /** Delimiters used in tokenization */
41  protected String m_Delimiters = " \r\n\t.,;:'\"()?!";
42 
43  /**
44   * Returns an enumeration of all the available options..
45   *
46   * @return            an enumeration of all available options.
47   */
48  public Enumeration listOptions() {
49    Vector<Option>      result;
50   
51    result = new Vector<Option>();
52   
53    result.addElement(new Option(
54        "\tThe delimiters to use\n"
55        + "\t(default ' \\r\\n\\t.,;:'\"()?!').",
56        "delimiters", 1, "-delimiters <value>"));
57   
58    return result.elements();
59  }
60 
61  /**
62   * Gets the current option settings for the OptionHandler.
63   *
64   * @return            the list of current option settings as an array of
65   *                    strings
66   */
67  public String[] getOptions() {
68    Vector<String>      result;
69   
70    result = new Vector<String>();
71   
72    result.add("-delimiters");
73    result.add(getDelimiters());
74   
75    return result.toArray(new String[result.size()]);
76  }
77
78  /**
79   * Sets the OptionHandler's options using the given list. All options
80   * will be set (or reset) during this call (i.e. incremental setting
81   * of options is not possible).
82   *
83   * @param options     the list of options as an array of strings
84   * @throws Exception  if an option is not supported
85   */
86  public void setOptions(String[] options) throws Exception {
87    String      tmpStr;
88   
89    tmpStr = Utils.getOption("delimiters", options);
90    if (tmpStr.length() != 0)
91      setDelimiters(tmpStr);
92    else
93      setDelimiters(" \r\n\t.,;:'\"()?!");
94  }
95
96  /**
97   * Get the value of delimiters (not backquoted).
98   *
99   * @return            Value of delimiters.
100   */
101  public String getDelimiters() {
102    return m_Delimiters;
103  }
104   
105  /**
106   * Set the value of delimiters. For convenienve, the strings
107   * "\r", "\n", "\t", "\'", "\\" get automatically translated into their
108   * character representations '\r', '\n', '\t', '\'', '\\'. This means, one
109   * can either use <code>setDelimiters("\r\n\t\\");</code> or
110   * <code>setDelimiters("\\r\\n\\t\\\\");</code>.
111   *
112   * @param value       Value to assign to delimiters.
113   * @see               Utils#unbackQuoteChars(String)
114   */
115  public void setDelimiters(String value) {
116    m_Delimiters = Utils.unbackQuoteChars(value);
117  }
118
119  /**
120   * Returns the tip text for this property
121   *
122   * @return            tip text for this property suitable for
123   *                    displaying in the explorer/experimenter gui
124   */
125  public String delimitersTipText() {
126    return "Set of delimiter characters to use in tokenizing (\\r, \\n and \\t can be used for carriage-return, line-feed and tab)";
127  }
128}
Note: See TracBrowser for help on using the repository browser.