source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/MakeIndicator.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 12.8 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    MakeIndicator.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.filters.unsupervised.attribute;
24
25import weka.core.Attribute;
26import weka.core.Capabilities;
27import weka.core.FastVector;
28import weka.core.Instance; 
29import weka.core.DenseInstance;
30import weka.core.Instances;
31import weka.core.Option;
32import weka.core.OptionHandler;
33import weka.core.Range;
34import weka.core.RevisionUtils;
35import weka.core.SingleIndex;
36import weka.core.UnsupportedAttributeTypeException;
37import weka.core.Utils;
38import weka.core.Capabilities.Capability;
39import weka.filters.Filter;
40import weka.filters.StreamableFilter;
41import weka.filters.UnsupervisedFilter;
42
43import java.util.Enumeration;
44import java.util.Vector;
45
46/**
47 <!-- globalinfo-start -->
48 * A filter that creates a new dataset with a boolean attribute replacing a nominal attribute.  In the new dataset, a value of 1 is assigned to an instance that exhibits a particular range of attribute values, a 0 to an instance that doesn't. The boolean attribute is coded as numeric by default.
49 * <p/>
50 <!-- globalinfo-end -->
51 *
52 <!-- options-start -->
53 * Valid options are: <p/>
54 *
55 * <pre> -C &lt;col&gt;
56 *  Sets the attribute index.</pre>
57 *
58 * <pre> -V &lt;index1,index2-index4,...&gt;
59 *  Specify the list of values to indicate. First and last are
60 *  valid indexes (default last)</pre>
61 *
62 * <pre> -N &lt;index&gt;
63 *  Set if new boolean attribute nominal.</pre>
64 *
65 <!-- options-end -->
66 *
67 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
68 * @version $Revision: 5987 $
69 */
70public class MakeIndicator 
71  extends Filter
72  implements UnsupervisedFilter, StreamableFilter, OptionHandler {
73
74  /** for serialization */
75  static final long serialVersionUID = 766001176862773163L;
76 
77  /** The attribute's index setting. */
78  private SingleIndex m_AttIndex = new SingleIndex("last"); 
79
80  /** The value's index */
81  private Range m_ValIndex;
82 
83  /** Make boolean attribute numeric. */
84  private boolean m_Numeric = true;
85
86  /**
87   * Constructor
88   */
89  public MakeIndicator() {
90
91      m_ValIndex = new Range("last");
92  }
93
94  /**
95   * Returns the Capabilities of this filter.
96   *
97   * @return            the capabilities of this object
98   * @see               Capabilities
99   */
100  public Capabilities getCapabilities() {
101    Capabilities result = super.getCapabilities();
102    result.disableAll();
103
104    // attributes
105    result.enableAllAttributes();
106    result.enable(Capability.MISSING_VALUES);
107   
108    // class
109    result.enableAllClasses();
110    result.enable(Capability.MISSING_CLASS_VALUES);
111    result.enable(Capability.NO_CLASS);
112   
113    return result;
114  }
115
116  /**
117   * Sets the format of the input instances.
118   *
119   * @param instanceInfo an Instances object containing the input
120   * instance structure (any instances contained in the object are
121   * ignored - only the structure is required).
122   * @return true if the outputFormat may be collected immediately
123   * @throws UnsupportedAttributeTypeException the selecte attribute is not nominal
124   * @throws UnsupportedAttributeTypeException the selecte attribute has fewer than two values.
125   */
126  public boolean setInputFormat(Instances instanceInfo) 
127       throws Exception {
128
129    super.setInputFormat(instanceInfo);
130    m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
131    m_ValIndex.setUpper(instanceInfo.attribute(m_AttIndex.
132                                               getIndex()).numValues() - 1);
133    if (!instanceInfo.attribute(m_AttIndex.getIndex()).isNominal()) {
134      throw new UnsupportedAttributeTypeException("Chosen attribute not nominal.");
135    }
136    if (instanceInfo.attribute(m_AttIndex.getIndex()).numValues() < 2) {
137      throw new UnsupportedAttributeTypeException("Chosen attribute has less " +
138                                                  "than two values.");
139    }
140    setOutputFormat();
141    return true;
142  }
143
144  /**
145   * Input an instance for filtering. The instance is processed
146   * and made available for output immediately.
147   *
148   * @param instance the input instance
149   * @return true if the filtered instance may now be
150   * collected with output().
151   * @throws IllegalStateException if no input format has been set.
152   */
153  public boolean input(Instance instance) {
154
155    if (getInputFormat() == null) {
156      throw new IllegalStateException("No input instance format defined");
157    }
158    if (m_NewBatch) {
159      resetQueue();
160      m_NewBatch = false;
161    }
162    Instance newInstance = (Instance)instance.copy();
163    if (!newInstance.isMissing(m_AttIndex.getIndex())) {
164      if (m_ValIndex.isInRange((int)newInstance.value(m_AttIndex.getIndex()))) {
165        newInstance.setValue(m_AttIndex.getIndex(), 1);
166      } else {
167        newInstance.setValue(m_AttIndex.getIndex(), 0);
168      }
169    }
170    push(newInstance);
171    return true;
172  }
173
174  /**
175   * Returns an enumeration describing the available options.
176   *
177   * @return an enumeration of all the available options.
178   */
179  public Enumeration listOptions() {
180
181    Vector newVector = new Vector(3);
182
183    newVector.addElement(new Option(
184              "\tSets the attribute index.",
185              "C", 1, "-C <col>"));
186
187    newVector.addElement(new Option(
188              "\tSpecify the list of values to indicate. First and last are\n"+
189              "\tvalid indexes (default last)",
190              "V", 1, "-V <index1,index2-index4,...>"));
191    newVector.addElement(new Option(
192              "\tSet if new boolean attribute nominal.",
193              "N", 0, "-N <index>"));
194
195    return newVector.elements();
196  }
197
198
199  /**
200   * Parses a given list of options. <p/>
201   *
202   <!-- options-start -->
203   * Valid options are: <p/>
204   *
205   * <pre> -C &lt;col&gt;
206   *  Sets the attribute index.</pre>
207   *
208   * <pre> -V &lt;index1,index2-index4,...&gt;
209   *  Specify the list of values to indicate. First and last are
210   *  valid indexes (default last)</pre>
211   *
212   * <pre> -N &lt;index&gt;
213   *  Set if new boolean attribute nominal.</pre>
214   *
215   <!-- options-end -->
216   *
217   * @param options the list of options as an array of strings
218   * @throws Exception if an option is not supported
219   */
220  public void setOptions(String[] options) throws Exception {
221   
222    String attIndex = Utils.getOption('C', options);
223    if (attIndex.length() != 0) {
224      setAttributeIndex(attIndex);
225    } else {
226      setAttributeIndex("last");
227    }
228
229    String valIndex = Utils.getOption('V', options);
230    if (valIndex.length() != 0) {
231      setValueIndices(valIndex);
232    } else {
233      setValueIndices("last");
234    }
235
236    setNumeric(!Utils.getFlag('N', options));
237
238    if (getInputFormat() != null) {
239      setInputFormat(getInputFormat());
240    }
241  }
242
243  /**
244   * Gets the current settings of the filter.
245   *
246   * @return an array of strings suitable for passing to setOptions
247   */
248  public String [] getOptions() {
249
250    String [] options = new String [5];
251    int current = 0;
252
253    options[current++] = "-C";
254    options[current++] = "" + (getAttributeIndex());
255    options[current++] = "-V"; 
256    options[current++] = getValueIndices();
257    if (!getNumeric()) {
258      options[current++] = "-N"; 
259    }
260    while (current < options.length) {
261      options[current++] = "";
262    }
263    return options;
264  }
265
266  /**
267   * @return a description of the filter suitable for
268   * displaying in the explorer/experimenter gui
269   */
270  public String globalInfo() {
271
272    return "A filter that creates a new dataset with a boolean attribute "
273      + "replacing a nominal attribute.  In the new dataset, a value of 1 is "
274      + "assigned to an instance that exhibits a particular range of attribute "
275      + "values, a 0 to an instance that doesn't. The boolean attribute is "
276      + "coded as numeric by default.";
277  }
278
279  /**
280   * @return tip text for this property suitable for
281   * displaying in the explorer/experimenter gui
282   */
283  public String attributeIndexTipText() {
284
285    return "Sets which attribute should be replaced by the indicator. This "
286      + "attribute must be nominal.";
287  }
288
289  /**
290   * Get the index of the attribute used.
291   *
292   * @return the index of the attribute
293   */
294  public String getAttributeIndex() {
295
296    return m_AttIndex.getSingleIndex();
297  }
298
299  /**
300   * Sets index of the attribute used.
301   *
302   * @param attIndex the index of the attribute
303   */
304  public void setAttributeIndex(String attIndex) {
305   
306    m_AttIndex.setSingleIndex(attIndex);
307  }
308
309  /**
310   * Get the range containing the indicator values.
311   *
312   * @return the range containing the indicator values
313   */
314  public Range getValueRange() {
315
316    return m_ValIndex;
317  }
318
319  /**
320   * @return tip text for this property suitable for
321   * displaying in the explorer/experimenter gui
322   */
323  public String valueIndicesTipText() {
324
325    return "Specify range of nominal values to act on."
326      + " This is a comma separated list of attribute indices (numbered from"
327      + " 1), with \"first\" and \"last\" valid values. Specify an inclusive"
328      + " range with \"-\". E.g: \"first-3,5,6-10,last\".";
329  }
330
331  /**
332   * Get the indices of the indicator values.
333   *
334   * @return the indices of the indicator values
335   */
336  public String getValueIndices() {
337
338    return m_ValIndex.getRanges();
339  }
340
341  /**
342   * Sets indices of the indicator values.
343   *
344   * @param range the string representation of the indicator value indices
345   * @see Range
346   */
347  public void setValueIndices(String range) {
348   
349    m_ValIndex.setRanges(range);
350  }
351
352  /**
353   * Sets index of the indicator value.
354   *
355   * @param index the index of the indicator value
356   */
357  public void setValueIndex(int index) {
358
359    setValueIndices("" +  (index + 1));
360  }
361
362  /**
363   * Set which attributes are to be deleted (or kept if invert is true)
364   *
365   * @param indices an array containing indexes of attributes to select.
366   * Since the array will typically come from a program, attributes are indexed
367   * from 0.
368   * @throws InvalidArgumentException if an invalid set of ranges is supplied
369   */
370  public void setValueIndicesArray(int [] indices) {
371   
372    setValueIndices(Range.indicesToRangeList(indices));
373  }
374
375  /**
376   * @return tip text for this property suitable for
377   * displaying in the explorer/experimenter gui
378   */
379  public String numericTipText() {
380
381    return "Determines whether the output indicator attribute is numeric. If "
382      + "this is set to false, the output attribute will be nominal.";
383  }
384
385  /**
386   * Sets if the new Attribute is to be numeric.
387   *
388   * @param bool true if new Attribute is to be numeric
389   */
390  public void setNumeric(boolean bool) {
391
392    m_Numeric = bool;
393  }
394
395  /**
396   * Check if new attribute is to be numeric.
397   *
398   * @return true if new attribute is to be numeric
399   */
400  public boolean getNumeric() {
401
402    return m_Numeric;
403  }
404
405  /**
406   * Set the output format.
407   */
408  private void setOutputFormat() {
409   
410    Instances newData;
411    FastVector newAtts, newVals;
412     
413    // Compute new attributes
414   
415    newAtts = new FastVector(getInputFormat().numAttributes());
416    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
417      Attribute att = getInputFormat().attribute(j);
418      if (j != m_AttIndex.getIndex()) {
419
420        // We don't have to copy the attribute because the
421        // attribute index remains unchanged.
422        newAtts.addElement(att);
423      } else {
424        if (m_Numeric) {
425          newAtts.addElement(new Attribute(att.name()));
426        } else {
427          String vals;
428          int [] sel = m_ValIndex.getSelection();
429          if (sel.length == 1) {
430            vals = att.value(sel[0]);
431          } else {
432            vals = m_ValIndex.getRanges().replace(',','_');
433          }
434          newVals = new FastVector(2);
435          newVals.addElement("neg_" + vals);
436          newVals.addElement("pos_" + vals);
437          newAtts.addElement(new Attribute(att.name(), newVals));
438        }
439      }
440    }
441
442    // Construct new header
443    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
444    newData.setClassIndex(getInputFormat().classIndex());
445    setOutputFormat(newData);
446  }
447 
448  /**
449   * Returns the revision string.
450   *
451   * @return            the revision
452   */
453  public String getRevision() {
454    return RevisionUtils.extract("$Revision: 5987 $");
455  }
456 
457  /**
458   * Main method for testing this class.
459   *
460   * @param argv should contain arguments to the filter:
461   * use -h for help
462   */
463  public static void main(String [] argv) {
464    runFilter(new MakeIndicator(), argv);
465  }
466}
Note: See TracBrowser for help on using the repository browser.