source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/instance/RemoveWithValues.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 17.7 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    RemoveWithValues.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.filters.unsupervised.instance;
25
26import weka.core.Attribute;
27import weka.core.Capabilities;
28import weka.core.FastVector;
29import weka.core.Instance;
30import weka.core.Instances;
31import weka.core.Option;
32import weka.core.OptionHandler;
33import weka.core.Range;
34import weka.core.RevisionUtils;
35import weka.core.SingleIndex;
36import weka.core.UnsupportedAttributeTypeException;
37import weka.core.Utils;
38import weka.core.Capabilities.Capability;
39import weka.filters.Filter;
40import weka.filters.StreamableFilter;
41import weka.filters.UnsupervisedFilter;
42
43import java.util.Enumeration;
44import java.util.Vector;
45
46/**
47 <!-- globalinfo-start -->
48 * Filters instances according to the value of an attribute.
49 * <p/>
50 <!-- globalinfo-end -->
51 *
52 <!-- options-start -->
53 * Valid options are: <p/>
54 *
55 * <pre> -C &lt;num&gt;
56 *  Choose attribute to be used for selection.</pre>
57 *
58 * <pre> -S &lt;num&gt;
59 *  Numeric value to be used for selection on numeric
60 *  attribute.
61 *  Instances with values smaller than given value will
62 *  be selected. (default 0)</pre>
63 *
64 * <pre> -L &lt;index1,index2-index4,...&gt;
65 *  Range of label indices to be used for selection on
66 *  nominal attribute.
67 *  First and last are valid indexes. (default all values)</pre>
68 *
69 * <pre> -M
70 *  Missing values count as a match. This setting is
71 *  independent of the -V option.
72 *  (default missing values don't match)</pre>
73 *
74 * <pre> -V
75 *  Invert matching sense.</pre>
76 *
77 * <pre> -H
78 *  When selecting on nominal attributes, removes header
79 *  references to excluded values.</pre>
80 *
81 <!-- options-end -->
82 *
83 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
84 * @version $Revision: 5499 $
85 */
86public class RemoveWithValues 
87  extends Filter
88  implements UnsupervisedFilter, StreamableFilter, OptionHandler {
89
90  /** for serialization */
91  static final long serialVersionUID = 4752870193679263361L;
92 
93  /** The attribute's index setting. */
94  private SingleIndex m_AttIndex = new SingleIndex("last"); 
95 
96  /** Stores which values of nominal attribute are to be used for filtering.*/
97  protected Range m_Values;
98
99  /** Stores which value of a numeric attribute is to be used for filtering.*/
100  protected double m_Value = 0;
101
102  /** True if missing values should count as a match */
103  protected boolean m_MatchMissingValues = false;
104
105  /** Modify header for nominal attributes? */
106  protected boolean m_ModifyHeader = false;
107
108  /** If m_ModifyHeader, stores a mapping from old to new indexes */
109  protected int [] m_NominalMapping;
110
111  /**
112   * Returns a string describing this classifier
113   * @return a description of the classifier suitable for
114   * displaying in the explorer/experimenter gui
115   */
116  public String globalInfo() {
117    return "Filters instances according to the value of an attribute.";
118  }
119
120  /** Default constructor */
121  public RemoveWithValues() {
122
123      m_Values = new Range("first-last");
124      m_Values.setInvert(true);
125  }
126
127  /**
128   * Returns an enumeration describing the available options.
129   *
130   * @return an enumeration of all the available options.
131   */
132  public Enumeration listOptions() {
133
134    Vector newVector = new Vector(5);
135
136    newVector.addElement(new Option(
137              "\tChoose attribute to be used for selection.",
138              "C", 1, "-C <num>"));
139    newVector.addElement(new Option(
140              "\tNumeric value to be used for selection on numeric\n"+
141              "\tattribute.\n"+
142              "\tInstances with values smaller than given value will\n"+
143              "\tbe selected. (default 0)",
144              "S", 1, "-S <num>"));
145    newVector.addElement(new Option(
146              "\tRange of label indices to be used for selection on\n"+
147              "\tnominal attribute.\n"+
148              "\tFirst and last are valid indexes. (default all values)",
149              "L", 1, "-L <index1,index2-index4,...>"));
150    newVector.addElement(new Option(
151              "\tMissing values count as a match. This setting is\n"+
152              "\tindependent of the -V option.\n"+
153              "\t(default missing values don't match)",
154              "M", 0, "-M"));
155    newVector.addElement(new Option(
156              "\tInvert matching sense.",
157              "V", 0, "-V"));
158    newVector.addElement(new Option(
159              "\tWhen selecting on nominal attributes, removes header\n"
160              + "\treferences to excluded values.",
161              "H", 0, "-H"));
162
163    return newVector.elements();
164  }
165
166
167  /**
168   * Parses a given list of options. <p/>
169   *
170   <!-- options-start -->
171   * Valid options are: <p/>
172   *
173   * <pre> -C &lt;num&gt;
174   *  Choose attribute to be used for selection.</pre>
175   *
176   * <pre> -S &lt;num&gt;
177   *  Numeric value to be used for selection on numeric
178   *  attribute.
179   *  Instances with values smaller than given value will
180   *  be selected. (default 0)</pre>
181   *
182   * <pre> -L &lt;index1,index2-index4,...&gt;
183   *  Range of label indices to be used for selection on
184   *  nominal attribute.
185   *  First and last are valid indexes. (default all values)</pre>
186   *
187   * <pre> -M
188   *  Missing values count as a match. This setting is
189   *  independent of the -V option.
190   *  (default missing values don't match)</pre>
191   *
192   * <pre> -V
193   *  Invert matching sense.</pre>
194   *
195   * <pre> -H
196   *  When selecting on nominal attributes, removes header
197   *  references to excluded values.</pre>
198   *
199   <!-- options-end -->
200   *
201   * @param options the list of options as an array of strings
202   * @throws Exception if an option is not supported
203   */
204  public void setOptions(String[] options) throws Exception {
205
206    String attIndex = Utils.getOption('C', options);
207    if (attIndex.length() != 0) {
208      setAttributeIndex(attIndex);
209    } else {
210      setAttributeIndex("last");
211    }
212   
213    String splitPoint = Utils.getOption('S', options);
214    if (splitPoint.length() != 0) {
215      setSplitPoint((new Double(splitPoint)).doubleValue());
216    } else {
217      setSplitPoint(0);
218    }
219
220    String convertList = Utils.getOption('L', options);
221    if (convertList.length() != 0) {
222      setNominalIndices(convertList);
223    } else {
224      setNominalIndices("first-last");
225    }
226    setInvertSelection(Utils.getFlag('V', options));
227    setMatchMissingValues(Utils.getFlag('M', options));
228    setModifyHeader(Utils.getFlag('H', options));
229    // Re-initialize output format according to new options
230   
231    if (getInputFormat() != null) {
232      setInputFormat(getInputFormat());
233    }
234  }
235
236  /**
237   * Gets the current settings of the filter.
238   *
239   * @return an array of strings suitable for passing to setOptions
240   */
241  public String [] getOptions() {
242
243    String [] options = new String [9];
244    int current = 0;
245
246    options[current++] = "-S"; options[current++] = "" + getSplitPoint();
247    options[current++] = "-C";
248    options[current++] = "" + (getAttributeIndex());
249    if (!getNominalIndices().equals("")) {
250      options[current++] = "-L"; options[current++] = getNominalIndices();
251    }
252    if (getInvertSelection()) {
253      options[current++] = "-V";
254    }
255    if (getMatchMissingValues()) {
256      options[current++] = "-M";
257    }
258    if (getModifyHeader()) {
259      options[current++] = "-H";
260    }
261    while (current < options.length) {
262      options[current++] = "";
263    }
264    return options;
265  }
266
267  /**
268   * Returns the Capabilities of this filter.
269   *
270   * @return            the capabilities of this object
271   * @see               Capabilities
272   */
273  public Capabilities getCapabilities() {
274    Capabilities result = super.getCapabilities();
275    result.disableAll();
276
277    // attributes
278    result.enableAllAttributes();
279    result.enable(Capability.MISSING_VALUES);
280   
281    // class
282    result.enableAllClasses();
283    result.enable(Capability.MISSING_CLASS_VALUES);
284    result.enable(Capability.NO_CLASS);
285   
286    return result;
287  }
288
289  /**
290   * Sets the format of the input instances.
291   *
292   * @param instanceInfo an Instances object containing the input instance
293   * structure (any instances contained in the object are ignored - only the
294   * structure is required).
295   * @throws UnsupportedAttributeTypeException if the specified attribute
296   * is neither numeric or nominal.
297   * @return true because outputFormat can be collected immediately
298   */
299  public boolean setInputFormat(Instances instanceInfo) throws Exception {
300
301    super.setInputFormat(instanceInfo);
302
303    m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
304    if (!isNumeric() && !isNominal()) {
305      throw new UnsupportedAttributeTypeException("Can only handle numeric " +
306                                                  "or nominal attributes.");
307    }
308    m_Values.setUpper(instanceInfo.attribute(m_AttIndex.getIndex()).numValues() - 1);
309    if (isNominal() && m_ModifyHeader) {
310      instanceInfo = new Instances(instanceInfo, 0); // copy before modifying
311      Attribute oldAtt = instanceInfo.attribute(m_AttIndex.getIndex());
312      int [] selection = m_Values.getSelection();
313      FastVector newVals = new FastVector();
314      for (int i = 0; i < selection.length; i++) {
315        newVals.addElement(oldAtt.value(selection[i]));
316      }
317      instanceInfo.deleteAttributeAt(m_AttIndex.getIndex());
318      instanceInfo.insertAttributeAt(new Attribute(oldAtt.name(), newVals),
319                                      m_AttIndex.getIndex());
320      m_NominalMapping = new int [oldAtt.numValues()];
321      for (int i = 0; i < m_NominalMapping.length; i++) {
322        boolean found = false;
323        for (int j = 0; j < selection.length; j++) {
324          if (selection[j] == i) {
325            m_NominalMapping[i] = j;
326            found = true;
327            break;
328          }
329        }
330        if (!found) {
331          m_NominalMapping[i] = -1;
332        }
333      }
334    }
335    setOutputFormat(instanceInfo);
336    return true;
337  }
338
339  /**
340   * Input an instance for filtering. Ordinarily the instance is processed
341   * and made available for output immediately. Some filters require all
342   * instances be read before producing output.
343   *
344   * @param instance the input instance
345   * @return true if the filtered instance may now be
346   * collected with output().
347   * @throws IllegalStateException if no input format has been set.
348   */
349  public boolean input(Instance instance) {
350
351    if (getInputFormat() == null) {
352      throw new IllegalStateException("No input instance format defined");
353    }
354    if (m_NewBatch) {
355      resetQueue();
356      m_NewBatch = false;
357    }
358    if (instance.isMissing(m_AttIndex.getIndex())) {
359      if (!getMatchMissingValues()) {
360        push((Instance)instance.copy());
361        return true;
362      } else {
363        return false;
364      }
365    }
366    if (isNumeric()) {
367      if (!m_Values.getInvert()) {
368        if (instance.value(m_AttIndex.getIndex()) < m_Value) {
369          push((Instance)instance.copy());
370          return true;
371        } 
372      } else {
373        if (instance.value(m_AttIndex.getIndex()) >= m_Value) {
374          push((Instance)instance.copy());
375          return true;
376        } 
377      }
378    }
379    if (isNominal()) {
380      if (m_Values.isInRange((int)instance.value(m_AttIndex.getIndex()))) {
381        Instance temp = (Instance)instance.copy();
382        if (getModifyHeader()) {
383          temp.setValue(m_AttIndex.getIndex(),
384                        m_NominalMapping[(int)instance.value(m_AttIndex.getIndex())]);
385        }
386        push(temp);
387        return true;
388      }
389    }
390    return false;
391  }
392
393  /**
394   * Returns true if selection attribute is nominal.
395   *
396   * @return true if selection attribute is nominal
397   */
398  public boolean isNominal() {
399   
400    if (getInputFormat() == null) {
401      return false;
402    } else {
403      return getInputFormat().attribute(m_AttIndex.getIndex()).isNominal();
404    }
405  }
406
407  /**
408   * Returns true if selection attribute is numeric.
409   *
410   * @return true if selection attribute is numeric
411   */
412  public boolean isNumeric() {
413   
414    if (getInputFormat() == null) {
415      return false;
416    } else {
417      return getInputFormat().attribute(m_AttIndex.getIndex()).isNumeric();
418    }
419  }
420
421  /**
422   * Returns the tip text for this property
423   * @return tip text for this property suitable for
424   * displaying in the explorer/experimenter gui
425   */
426  public String modifyHeaderTipText() {
427    return "When selecting on nominal attributes, removes header references to "
428      + "excluded values.";
429  }
430
431  /**
432   * Gets whether the header will be modified when selecting on nominal
433   * attributes.
434   *
435   * @return true if so.
436   */
437  public boolean getModifyHeader() {
438   
439    return m_ModifyHeader;
440  }
441 
442  /**
443   * Sets whether the header will be modified when selecting on nominal
444   * attributes.
445   *
446   * @param newModifyHeader true if so.
447   */
448  public void setModifyHeader(boolean newModifyHeader) {
449   
450    m_ModifyHeader = newModifyHeader;
451  }
452
453  /**
454   * Returns the tip text for this property
455   * @return tip text for this property suitable for
456   * displaying in the explorer/experimenter gui
457   */
458  public String attributeIndexTipText() {
459    return "Choose attribute to be used for selection (default last).";
460  }
461
462  /**
463   * Get the index of the attribute used.
464   *
465   * @return the index of the attribute
466   */
467  public String getAttributeIndex() {
468
469    return m_AttIndex.getSingleIndex();
470  }
471
472  /**
473   * Sets index of the attribute used.
474   *
475   * @param attIndex the index of the attribute
476   */
477  public void setAttributeIndex(String attIndex) {
478   
479    m_AttIndex.setSingleIndex(attIndex);
480  }
481
482  /**
483   * Returns the tip text for this property
484   * @return tip text for this property suitable for
485   * displaying in the explorer/experimenter gui
486   */
487  public String splitPointTipText() {
488    return "Numeric value to be used for selection on numeric attribute. "
489     + "Instances with values smaller than given value will be selected.";
490  }
491
492  /**
493   * Get the split point used for numeric selection
494   *
495   * @return the numeric split point
496   */
497  public double getSplitPoint() {
498
499    return m_Value;
500  }
501
502  /**
503   * Split point to be used for selection on numeric attribute.
504   *
505   * @param value the split point
506   */
507  public void setSplitPoint(double value) {
508
509    m_Value = value;
510  }
511
512  /**
513   * Returns the tip text for this property
514   * @return tip text for this property suitable for
515   * displaying in the explorer/experimenter gui
516   */
517  public String matchMissingValuesTipText() {
518    return "Missing values count as a match. This setting is independent of "
519      + "the invertSelection option.";
520  }
521
522  /**
523   * Gets whether missing values are counted as a match.
524   *
525   * @return true if missing values are counted as a match.
526   */
527  public boolean getMatchMissingValues() {
528
529    return m_MatchMissingValues;
530  }
531 
532  /**
533   * Sets whether missing values are counted as a match.
534   *
535   * @param newMatchMissingValues true if missing values are counted as a match.
536   */
537  public void setMatchMissingValues(boolean newMatchMissingValues) {
538
539    m_MatchMissingValues = newMatchMissingValues;
540  }
541 
542  /**
543   * Returns the tip text for this property
544   * @return tip text for this property suitable for
545   * displaying in the explorer/experimenter gui
546   */
547  public String invertSelectionTipText() {
548    return "Invert matching sense.";
549  }
550
551  /**
552   * Get whether the supplied columns are to be removed or kept
553   *
554   * @return true if the supplied columns will be kept
555   */
556  public boolean getInvertSelection() {
557
558    return !m_Values.getInvert();
559  }
560
561  /**
562   * Set whether selected values should be removed or kept. If true the
563   * selected values are kept and unselected values are deleted.
564   *
565   * @param invert the new invert setting
566   */
567  public void setInvertSelection(boolean invert) {
568
569    m_Values.setInvert(!invert);
570  }
571
572  /**
573   * Returns the tip text for this property
574   * @return tip text for this property suitable for
575   * displaying in the explorer/experimenter gui
576   */
577  public String nominalIndicesTipText() {
578    return "Range of label indices to be used for selection on nominal attribute. "
579      +"First and last are valid indexes.";
580  }
581
582  /**
583   * Get the set of nominal value indices that will be used for selection
584   *
585   * @return rangeList a string representing the list of nominal indices.
586   */
587  public String getNominalIndices() {
588
589    return m_Values.getRanges();
590  }
591
592  /**
593   * Set which nominal labels are to be included in the selection.
594   *
595   * @param rangeList a string representing the list of nominal indices.
596   * eg: first-3,5,6-last
597   * @throws InvalidArgumentException if an invalid range list is supplied
598   */
599  public void setNominalIndices(String rangeList) {
600   
601    m_Values.setRanges(rangeList);
602  }
603
604  /**
605   * Set which values of a nominal attribute are to be used for
606   * selection.
607   *
608   * @param values an array containing indexes of values to be
609   * used for selection
610   * @throws InvalidArgumentException if an invalid set of ranges is supplied
611   */
612  public void setNominalIndicesArr(int [] values) {
613
614    String rangeList = "";
615    for(int i = 0; i < values.length; i++) {
616      if (i == 0) {
617        rangeList = "" + (values[i] + 1);
618      } else {
619        rangeList += "," + (values[i] + 1);
620      }
621    }
622    setNominalIndices(rangeList);
623  }
624 
625  /**
626   * Returns the revision string.
627   *
628   * @return            the revision
629   */
630  public String getRevision() {
631    return RevisionUtils.extract("$Revision: 5499 $");
632  }
633
634  /**
635   * Main method for testing this class.
636   *
637   * @param argv should contain arguments to the filter:
638   * use -h for help
639   */
640  public static void main(String [] argv) {
641    runFilter(new RemoveWithValues(), argv);
642  }
643}
Note: See TracBrowser for help on using the repository browser.