source: src/main/java/weka/filters/unsupervised/instance/RemovePercentage.java @ 4

Last change on this file since 4 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 8.8 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    RemovePercentage.java
19 *    Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.filters.unsupervised.instance;
25
26import weka.core.Capabilities;
27import weka.core.Instance;
28import weka.core.Instances;
29import weka.core.Option;
30import weka.core.OptionHandler;
31import weka.core.RevisionUtils;
32import weka.core.Utils;
33import weka.core.Capabilities.Capability;
34import weka.filters.Filter;
35import weka.filters.UnsupervisedFilter;
36
37import java.util.Enumeration;
38import java.util.Vector;
39
40/**
41 <!-- globalinfo-start -->
42 * A filter that removes a given percentage of a dataset.
43 * <p/>
44 <!-- globalinfo-end -->
45 *
46 <!-- options-start -->
47 * Valid options are: <p/>
48 *
49 * <pre> -P &lt;percentage&gt;
50 *  Specifies percentage of instances to select. (default 50)
51 * </pre>
52 *
53 * <pre> -V
54 *  Specifies if inverse of selection is to be output.
55 * </pre>
56 *
57 <!-- options-end -->
58 *
59 * @author Richard Kirkby (eibe@cs.waikato.ac.nz)
60 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
61 * @version $Revision: 5499 $
62*/
63public class RemovePercentage 
64  extends Filter
65  implements UnsupervisedFilter, OptionHandler {
66
67  /** for serialization */
68  static final long serialVersionUID = 2150341191158533133L;
69 
70  /** Percentage of instances to select. */
71  private double m_Percentage = 50;
72
73  /** Indicates if inverse of selection is to be output. */
74  private boolean m_Inverse = false;
75
76  /**
77   * Gets an enumeration describing the available options..
78   *
79   * @return an enumeration of all the available options.
80   */
81  public Enumeration listOptions() {
82
83    Vector newVector = new Vector(2);
84
85    newVector.addElement(new Option(
86              "\tSpecifies percentage of instances to select. (default 50)\n",
87              "P", 1, "-P <percentage>"));
88
89    newVector.addElement(new Option(
90              "\tSpecifies if inverse of selection is to be output.\n",
91              "V", 0, "-V"));
92
93    return newVector.elements();
94  }
95
96  /**
97   * Parses a given list of options. <p/>
98   *
99   <!-- options-start -->
100   * Valid options are: <p/>
101   *
102   * <pre> -P &lt;percentage&gt;
103   *  Specifies percentage of instances to select. (default 50)
104   * </pre>
105   *
106   * <pre> -V
107   *  Specifies if inverse of selection is to be output.
108   * </pre>
109   *
110   <!-- options-end -->
111   *
112   * @param options the list of options as an array of strings
113   * @throws Exception if an option is not supported
114   */
115  public void setOptions(String[] options) throws Exception {
116
117    String percent = Utils.getOption('P', options);
118    if (percent.length() != 0) {
119      setPercentage(Double.parseDouble(percent));
120    } else {
121      setPercentage(50.0);
122    }
123    setInvertSelection(Utils.getFlag('V', options));
124
125    if (getInputFormat() != null) {
126      setInputFormat(getInputFormat());
127    }
128  }
129
130  /**
131   * Gets the current settings of the filter.
132   *
133   * @return an array of strings suitable for passing to setOptions
134   */
135  public String [] getOptions() {
136
137    String [] options = new String [5];
138    int current = 0;
139
140    options[current++] = "-P"; options[current++] = "" + getPercentage();
141    if (getInvertSelection()) {
142      options[current++] = "-V";
143    }
144
145    while (current < options.length) {
146      options[current++] = "";
147    }
148    return options;
149  }
150
151  /**
152   * Returns a string describing this filter
153   *
154   * @return a description of the filter suitable for
155   * displaying in the explorer/experimenter gui
156   */
157  public String globalInfo() {
158
159    return "A filter that removes a given percentage of a dataset.";
160  }
161
162  /**
163   * Returns the tip text for this property
164   *
165   * @return tip text for this property suitable for
166   * displaying in the explorer/experimenter gui
167   */
168  public String percentageTipText() {
169
170    return "The percentage of the data to select.";
171  }
172
173  /**
174   * Gets the percentage of instances to select.
175   *
176   * @return the percentage.
177   */
178  public double getPercentage() {
179
180    return m_Percentage;
181  }
182
183  /**
184   * Sets the percentage of intances to select.
185   *
186   * @param percent the percentage
187   * @throws IllegalArgumentException if percentage out of range
188   */
189  public void setPercentage(double percent) {
190
191    if (percent < 0 || percent > 100) {
192      throw new IllegalArgumentException("Percentage must be between 0 and 100.");
193    }
194    m_Percentage = percent;
195  }
196
197  /**
198   * Returns the tip text for this property
199   *
200   * @return tip text for this property suitable for
201   * displaying in the explorer/experimenter gui
202   */
203  public String invertSelectionTipText() {
204
205    return "Whether to invert the selection.";
206  }
207
208  /**
209   * Gets if selection is to be inverted.
210   *
211   * @return true if the selection is to be inverted
212   */
213  public boolean getInvertSelection() {
214
215    return m_Inverse;
216  }
217
218  /**
219   * Sets if selection is to be inverted.
220   *
221   * @param inverse true if inversion is to be performed
222   */
223  public void setInvertSelection(boolean inverse) {
224   
225    m_Inverse = inverse;
226  }
227
228  /**
229   * Returns the Capabilities of this filter.
230   *
231   * @return            the capabilities of this object
232   * @see               Capabilities
233   */
234  public Capabilities getCapabilities() {
235    Capabilities result = super.getCapabilities();
236    result.disableAll();
237
238    // attributes
239    result.enableAllAttributes();
240    result.enable(Capability.MISSING_VALUES);
241   
242    // class
243    result.enableAllClasses();
244    result.enable(Capability.MISSING_CLASS_VALUES);
245    result.enable(Capability.NO_CLASS);
246   
247    return result;
248  }
249
250  /**
251   * Sets the format of the input instances.
252   *
253   * @param instanceInfo an Instances object containing the input instance
254   * structure (any instances contained in the object are ignored - only the
255   * structure is required).
256   * @return true because outputFormat can be collected immediately
257   * @throws Exception if the input format can't be set successfully
258   */ 
259  public boolean setInputFormat(Instances instanceInfo) throws Exception {
260
261    super.setInputFormat(instanceInfo);
262    setOutputFormat(instanceInfo);
263    return true;
264  }
265 
266  /**
267   * Input an instance for filtering. Ordinarily the instance is processed
268   * and made available for output immediately. Some filters require all
269   * instances be read before producing output.
270   *
271   * @param instance the input instance
272   * @return true if the filtered instance may now be
273   * collected with output().
274   * @throws IllegalStateException if no input format has been set.
275   */
276  public boolean input(Instance instance) {
277     if (getInputFormat() == null) {
278        throw new IllegalStateException("No input instance format defined");
279     }
280     
281     if (m_NewBatch) {
282        resetQueue();
283        m_NewBatch = false;
284     }
285
286     if (isFirstBatchDone()) {
287       push(instance);
288       return true;
289     } 
290     else {
291       bufferInput(instance);
292       return false;
293     }
294  }
295
296  /**
297   * Signify that this batch of input to the filter is
298   * finished. Output() may now be called to retrieve the filtered
299   * instances.
300   *
301   * @return true if there are instances pending output
302   * @throws IllegalStateException if no input structure has been defined
303   */
304  public boolean batchFinished() {
305
306    if (getInputFormat() == null) {
307      throw new IllegalStateException("No input instance format defined");
308    }
309
310    // Push instances for output into output queue
311    Instances toFilter = getInputFormat();
312    int cutOff = (int) Math.round(toFilter.numInstances() * m_Percentage / 100);
313   
314    if (m_Inverse) {
315      for (int i = 0; i < cutOff; i++) {
316        push(toFilter.instance(i));
317      }
318    } else {
319      for (int i = cutOff; i < toFilter.numInstances(); i++) {
320        push(toFilter.instance(i));
321      }
322    }
323    flushInput();
324   
325    m_NewBatch = true;
326    m_FirstBatchDone = true;
327   
328    return (numPendingOutput() != 0);
329  }
330 
331  /**
332   * Returns the revision string.
333   *
334   * @return            the revision
335   */
336  public String getRevision() {
337    return RevisionUtils.extract("$Revision: 5499 $");
338  }
339
340  /**
341   * Main method for testing this class.
342   *
343   * @param argv should contain arguments to the filter: use -h for help
344   */
345  public static void main(String [] argv) {
346    runFilter(new RemovePercentage(), argv);
347  }
348}
Note: See TracBrowser for help on using the repository browser.