source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/Remove.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 10.6 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    Remove.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.filters.unsupervised.attribute;
25
26import weka.core.Attribute;
27import weka.core.Capabilities;
28import weka.core.FastVector;
29import weka.core.Instance; 
30import weka.core.DenseInstance;
31import weka.core.DenseInstance;
32import weka.core.Instances;
33import weka.core.Option;
34import weka.core.OptionHandler;
35import weka.core.Range;
36import weka.core.RevisionUtils;
37import weka.core.SparseInstance;
38import weka.core.Utils;
39import weka.core.Capabilities.Capability;
40import weka.filters.Filter;
41import weka.filters.StreamableFilter;
42import weka.filters.UnsupervisedFilter;
43
44import java.util.Enumeration;
45import java.util.Vector;
46
47/**
48 <!-- globalinfo-start -->
49 * An filter that removes a range of attributes from the dataset.
50 * <p/>
51 <!-- globalinfo-end -->
52 *
53 <!-- options-start -->
54 * Valid options are: <p/>
55 *
56 * <pre> -R &lt;index1,index2-index4,...&gt;
57 *  Specify list of columns to delete. First and last are valid
58 *  indexes. (default none)</pre>
59 *
60 * <pre> -V
61 *  Invert matching sense (i.e. only keep specified columns)</pre>
62 *
63 <!-- options-end -->
64 *
65 * @author Len Trigg (trigg@cs.waikato.ac.nz)
66 * @version $Revision: 6128 $
67 */
68public class Remove 
69  extends Filter
70  implements UnsupervisedFilter, StreamableFilter, OptionHandler {
71
72  /** for serialization */
73  static final long serialVersionUID = 5011337331921522847L;
74 
75  /** Stores which columns to select as a funky range */
76  protected Range m_SelectCols = new Range();
77
78  /**
79   * Stores the indexes of the selected attributes in order, once the
80   * dataset is seen
81   */
82  protected int [] m_SelectedAttributes;
83
84  /**
85   * Constructor so that we can initialize the Range variable properly.
86   */
87  public Remove() {
88       
89    m_SelectCols.setInvert(true);
90  }
91
92  /**
93   * Returns an enumeration describing the available options.
94   *
95   * @return an enumeration of all the available options.
96   */
97  public Enumeration listOptions() {
98
99    Vector newVector = new Vector(2);
100
101    newVector.addElement(new Option(
102              "\tSpecify list of columns to delete. First and last are valid\n"
103              +"\tindexes. (default none)",
104              "R", 1, "-R <index1,index2-index4,...>"));
105    newVector.addElement(new Option(
106              "\tInvert matching sense (i.e. only keep specified columns)",
107              "V", 0, "-V"));
108
109    return newVector.elements();
110  }
111
112  /**
113   * Parses a given list of options. <p/>
114   *
115   <!-- options-start -->
116   * Valid options are: <p/>
117   *
118   * <pre> -R &lt;index1,index2-index4,...&gt;
119   *  Specify list of columns to delete. First and last are valid
120   *  indexes. (default none)</pre>
121   *
122   * <pre> -V
123   *  Invert matching sense (i.e. only keep specified columns)</pre>
124   *
125   <!-- options-end -->
126   *
127   * @param options the list of options as an array of strings
128   * @throws Exception if an option is not supported
129   */
130  public void setOptions(String[] options) throws Exception {
131
132    String deleteList = Utils.getOption('R', options);
133    if (deleteList.length() != 0) {
134      setAttributeIndices(deleteList);
135    }
136    setInvertSelection(Utils.getFlag('V', options));
137   
138    if (getInputFormat() != null) {
139      setInputFormat(getInputFormat());
140    }
141  }
142
143  /**
144   * Gets the current settings of the filter.
145   *
146   * @return an array of strings suitable for passing to setOptions
147   */
148  public String [] getOptions() {
149
150    String [] options = new String [3];
151    int current = 0;
152
153    if (getInvertSelection()) {
154      options[current++] = "-V";
155    }
156    if (!getAttributeIndices().equals("")) {
157      options[current++] = "-R"; options[current++] = getAttributeIndices();
158    }
159
160    while (current < options.length) {
161      options[current++] = "";
162    }
163    return options;
164  }
165
166  /**
167   * Returns the Capabilities of this filter.
168   *
169   * @return            the capabilities of this object
170   * @see               Capabilities
171   */
172  public Capabilities getCapabilities() {
173    Capabilities result = super.getCapabilities();
174    result.disableAll();
175
176    // attributes
177    result.enableAllAttributes();
178    result.enable(Capability.MISSING_VALUES);
179   
180    // class
181    result.enableAllClasses();
182    result.enable(Capability.MISSING_CLASS_VALUES);
183    result.enable(Capability.NO_CLASS);
184   
185    return result;
186  }
187
188  /**
189   * Sets the format of the input instances.
190   *
191   * @param instanceInfo an Instances object containing the input instance
192   * structure (any instances contained in the object are ignored - only the
193   * structure is required).
194   * @return true if the outputFormat may be collected immediately
195   * @throws Exception if the format couldn't be set successfully
196   */
197  public boolean setInputFormat(Instances instanceInfo) throws Exception {
198
199    super.setInputFormat(instanceInfo);
200   
201    m_SelectCols.setUpper(instanceInfo.numAttributes() - 1);
202
203    // Create the output buffer
204    FastVector attributes = new FastVector();
205    int outputClass = -1;
206    m_SelectedAttributes = m_SelectCols.getSelection();
207    for (int i = 0; i < m_SelectedAttributes.length; i++) {
208      int current = m_SelectedAttributes[i];
209      if (instanceInfo.classIndex() == current) {
210        outputClass = attributes.size();
211      }
212      Attribute keep = (Attribute)instanceInfo.attribute(current).copy();
213      attributes.addElement(keep);
214    }
215    initInputLocators(instanceInfo, m_SelectedAttributes);
216    Instances outputFormat = new Instances(instanceInfo.relationName(),
217                                           attributes, 0); 
218    outputFormat.setClassIndex(outputClass);
219    setOutputFormat(outputFormat);
220    return true;
221  }
222 
223
224  /**
225   * Input an instance for filtering. Ordinarily the instance is processed
226   * and made available for output immediately. Some filters require all
227   * instances be read before producing output.
228   *
229   * @param instance the input instance
230   * @return true if the filtered instance may now be
231   * collected with output().
232   * @throws IllegalStateException if no input structure has been defined.
233   */
234  public boolean input(Instance instance) {
235
236    if (getInputFormat() == null) {
237      throw new IllegalStateException("No input instance format defined");
238    }
239    if (m_NewBatch) {
240      resetQueue();
241      m_NewBatch = false;
242    }
243
244    if (getOutputFormat().numAttributes() == 0) {
245      return false;
246    }
247    double [] vals = new double[getOutputFormat().numAttributes()];
248    for (int i = 0; i < m_SelectedAttributes.length; i++) {
249      int current = m_SelectedAttributes[i];
250      vals[i] = instance.value(current);
251    }
252    Instance inst = null;
253    if (instance instanceof SparseInstance) {
254      inst = new SparseInstance(instance.weight(), vals);
255    } else {
256      inst = new DenseInstance(instance.weight(), vals);
257    }
258    inst.setDataset(getOutputFormat());
259    copyValues(inst, false, instance.dataset(), getOutputFormat());
260    inst.setDataset(getOutputFormat());
261    push(inst);
262    return true;
263  }
264
265  /**
266   * Returns a string describing this filter
267   *
268   * @return a description of the filter suitable for
269   * displaying in the explorer/experimenter gui
270   */
271  public String globalInfo() {
272
273    return "A filter that removes a range of"
274      + " attributes from the dataset.";
275  }
276
277  /**
278   * Returns the tip text for this property
279   *
280   * @return tip text for this property suitable for
281   * displaying in the explorer/experimenter gui
282   */
283  public String invertSelectionTipText() {
284
285    return "Determines whether action is to select or delete."
286      + " If set to true, only the specified attributes will be kept;"
287      + " If set to false, specified attributes will be deleted.";
288  }
289
290  /**
291   * Get whether the supplied columns are to be removed or kept
292   *
293   * @return true if the supplied columns will be kept
294   */
295  public boolean getInvertSelection() {
296
297    return !m_SelectCols.getInvert();
298  }
299
300  /**
301   * Set whether selected columns should be removed or kept. If true the
302   * selected columns are kept and unselected columns are deleted. If false
303   * selected columns are deleted and unselected columns are kept.
304   *
305   * @param invert the new invert setting
306   */
307  public void setInvertSelection(boolean invert) {
308
309    m_SelectCols.setInvert(!invert);
310  }
311
312  /**
313   * Returns the tip text for this property
314   *
315   * @return tip text for this property suitable for
316   * displaying in the explorer/experimenter gui
317   */
318  public String attributeIndicesTipText() {
319
320    return "Specify range of attributes to act on."
321      + " This is a comma separated list of attribute indices, with"
322      + " \"first\" and \"last\" valid values. Specify an inclusive"
323      + " range with \"-\". E.g: \"first-3,5,6-10,last\".";
324  }
325
326  /**
327   * Get the current range selection.
328   *
329   * @return a string containing a comma separated list of ranges
330   */
331  public String getAttributeIndices() {
332
333    return m_SelectCols.getRanges();
334  }
335
336  /**
337   * Set which attributes are to be deleted (or kept if invert is true)
338   *
339   * @param rangeList a string representing the list of attributes.  Since
340   * the string will typically come from a user, attributes are indexed from
341   * 1. <br>
342   * eg: first-3,5,6-last
343   */
344  public void setAttributeIndices(String rangeList) {
345
346    m_SelectCols.setRanges(rangeList);
347  }
348
349  /**
350   * Set which attributes are to be deleted (or kept if invert is true)
351   *
352   * @param attributes an array containing indexes of attributes to select.
353   * Since the array will typically come from a program, attributes are indexed
354   * from 0.
355   */
356  public void setAttributeIndicesArray(int [] attributes) {
357   
358    setAttributeIndices(Range.indicesToRangeList(attributes));
359  }
360 
361  /**
362   * Returns the revision string.
363   *
364   * @return            the revision
365   */
366  public String getRevision() {
367    return RevisionUtils.extract("$Revision: 6128 $");
368  }
369
370  /**
371   * Main method for testing this class.
372   *
373   * @param argv should contain arguments to the filter: use -h for help
374   */
375  public static void main(String [] argv) {
376    runFilter(new Remove(), argv);
377  }
378}
Note: See TracBrowser for help on using the repository browser.