source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/RemoveByName.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 8.7 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * RemoveByName.java
19 * Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.filters.unsupervised.attribute;
24
25import java.util.Enumeration;
26import java.util.Vector;
27
28import weka.core.Capabilities;
29import weka.core.Instance;
30import weka.core.Instances;
31import weka.core.Option;
32import weka.core.RevisionUtils;
33import weka.core.Utils;
34import weka.filters.SimpleStreamFilter;
35
36/**
37 <!-- globalinfo-start -->
38 * Removes attributes based on a regular expression matched against their names.
39 * <p/>
40 <!-- globalinfo-end -->
41 *
42 <!-- options-start -->
43 * Valid options are: <p/>
44 *
45 * <pre> -D
46 *  Turns on output of debugging information.</pre>
47 *
48 * <pre> -E &lt;regular expression&gt;
49 *  The regular expression to match the attribute names against.
50 *  (default: ^.*id$)</pre>
51 *
52 * <pre> -V
53 *  Flag for inverting the matching sense. If set, attributes are kept
54 *  instead of deleted.
55 *  (default: off)</pre>
56 *
57 <!-- options-end -->
58 *
59 * @author FracPete (fracpete at waikato dot ac dot nz)
60 * @version $Revision: 6076 $
61 */
62public class RemoveByName
63  extends SimpleStreamFilter {
64
65  /** for serialization. */
66  private static final long serialVersionUID = -3335106965521265631L;
67
68  /** the default expression. */
69  public final static String DEFAULT_EXPRESSION = "^.*id$";
70 
71  /** the regular expression for selecting the attributes by name. */
72  protected String m_Expression = DEFAULT_EXPRESSION;
73 
74  /** whether to invert the matching sense. */
75  protected boolean m_InvertSelection;
76
77  /** the Remove filter used internally for removing the attributes. */
78  protected Remove m_Remove;
79 
80  /**
81   * Returns a string describing this classifier.
82   *
83   * @return      a description of the classifier suitable for
84   *              displaying in the explorer/experimenter gui
85   */
86  public String globalInfo() {
87    return 
88        "Removes attributes based on a regular expression matched against "
89      + "their names.";
90  }
91
92  /**
93   * Gets an enumeration describing the available options.
94   *
95   * @return an enumeration of all the available options.
96   */
97  public Enumeration listOptions() {
98    Vector      result;
99    Enumeration enm;
100
101    result = new Vector();
102
103    enm = super.listOptions();
104    while (enm.hasMoreElements())
105      result.addElement(enm.nextElement());
106
107    result.addElement(new Option(
108        "\tThe regular expression to match the attribute names against.\n"
109        + "\t(default: " + DEFAULT_EXPRESSION + ")",
110        "E", 1, "-E <regular expression>"));
111
112    result.addElement(new Option(
113        "\tFlag for inverting the matching sense. If set, attributes are kept\n"
114        + "\tinstead of deleted.\n"
115        + "\t(default: off)",
116        "V", 0, "-V"));
117
118    return result.elements();
119  }
120
121  /**
122   * returns the options of the current setup.
123   *
124   * @return      the current options
125   */
126  public String[] getOptions() {
127    int                 i;
128    Vector<String>      result;
129    String[]            options;
130
131    result = new Vector();
132    options = super.getOptions();
133    for (i = 0; i < options.length; i++)
134      result.add(options[i]);
135
136    result.add("-E");
137    result.add("" + getExpression());
138
139    if (getInvertSelection())
140      result.add("-V");
141
142    return (String[]) result.toArray(new String[result.size()]);         
143  }
144
145  /**
146   * Parses the options for this object. <p/>
147   *
148   <!-- options-start -->
149   * Valid options are: <p/>
150   *
151   * <pre> -D
152   *  Turns on output of debugging information.</pre>
153   *
154   * <pre> -E &lt;regular expression&gt;
155   *  The regular expression to match the attribute names against.
156   *  (default: ^.*id$)</pre>
157   *
158   * <pre> -V
159   *  Flag for inverting the matching sense. If set, attributes are kept
160   *  instead of deleted.
161   *  (default: off)</pre>
162   *
163   <!-- options-end -->
164   *
165   * @param options     the options to use
166   * @throws Exception  if the option setting fails
167   */
168  public void setOptions(String[] options) throws Exception {
169    String      tmpStr;
170
171    super.setOptions(options);
172
173    tmpStr = Utils.getOption("E", options);
174    if (tmpStr.length() != 0)
175      setExpression(tmpStr);
176    else
177      setExpression(DEFAULT_EXPRESSION);
178
179    setInvertSelection(Utils.getFlag("V", options));
180  }
181
182  /**
183   * Sets the regular expression to match the attribute names against.
184   *
185   * @param value       the regular expression
186   */
187  public void setExpression(String value) {
188    m_Expression = value;
189  }
190
191  /**
192   * Returns the regular expression in use.
193   *
194   * @return            the regular expression
195   */
196  public String getExpression() {
197    return m_Expression;
198  }
199
200  /**
201   * Returns the tip text for this property.
202   *
203   * @return            tip text for this property suitable for
204   *                    displaying in the explorer/experimenter gui
205   */
206  public String expressionTipText() {
207    return "The regular expression to match the attribute names against.";
208  }
209
210  /**
211   * Set whether selected columns should be removed or kept. If true the
212   * selected columns are kept and unselected columns are deleted. If false
213   * selected columns are deleted and unselected columns are kept.
214   *
215   * @param value       the new invert setting
216   */
217  public void setInvertSelection(boolean value) {
218    m_InvertSelection = value;
219  }
220
221  /**
222   * Get whether the supplied columns are to be removed or kept.
223   *
224   * @return            true if the supplied columns will be kept
225   */
226  public boolean getInvertSelection() {
227    return m_InvertSelection;
228  }
229
230  /**
231   * Returns the tip text for this property.
232   *
233   * @return            tip text for this property suitable for
234   *                    displaying in the explorer/experimenter gui
235   */
236  public String invertSelectionTipText() {
237    return "Determines whether action is to select or delete."
238      + " If set to true, only the specified attributes will be kept;"
239      + " If set to false, specified attributes will be deleted.";
240  }
241 
242  /**
243   * Determines the output format based on the input format and returns
244   * this. In case the output format cannot be returned immediately, i.e.,
245   * immediateOutputFormat() returns false, then this method will be called
246   * from batchFinished().
247   *
248   * @param inputFormat the input format to base the output format on
249   * @return            the output format
250   * @throws Exception  in case the determination goes wrong
251   */
252  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
253    Vector<Integer>     indices;
254    int[]               attributes;
255    int                 i;
256   
257    // determine indices
258    indices = new Vector<Integer>();
259    for (i = 0; i < inputFormat.numAttributes(); i++) {
260      // skip class
261      if (i == inputFormat.classIndex())
262        continue;
263      if (inputFormat.attribute(i).name().matches(m_Expression))
264        indices.add(i);
265    }
266    attributes = new int[indices.size()];
267    for (i = 0; i < indices.size(); i++)
268      attributes[i] = indices.get(i);
269   
270    m_Remove = new Remove();
271    m_Remove.setAttributeIndicesArray(attributes);
272    m_Remove.setInvertSelection(getInvertSelection());
273    m_Remove.setInputFormat(inputFormat);
274   
275    return m_Remove.getOutputFormat();
276  }
277
278  /**
279   * Returns the Capabilities of this filter.
280   *
281   * @return            the capabilities of this object
282   * @see               Capabilities
283   */
284  public Capabilities getCapabilities() {
285    Capabilities result;
286   
287    result = new Remove().getCapabilities();
288    result.setOwner(this);
289   
290    return result;
291  }
292 
293  /**
294   * processes the given instance (may change the provided instance) and
295   * returns the modified version.
296   *
297   * @param instance    the instance to process
298   * @return            the modified data
299   * @throws Exception  in case the processing goes wrong
300   */
301  protected Instance process(Instance instance) throws Exception {
302    m_Remove.input(instance);
303    return m_Remove.output();
304  }
305 
306  /**
307   * Returns the revision string.
308   *
309   * @return            the revision
310   */
311  public String getRevision() {
312    return RevisionUtils.extract("$Revision: 6076 $");
313  }
314
315  /**
316   * runs the filter with the given arguments.
317   *
318   * @param args      the commandline arguments
319   */
320  public static void main(String[] args) {
321    runFilter(new RemoveByName(), args);
322  }
323}
Note: See TracBrowser for help on using the repository browser.