source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/StringToNominal.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 9.7 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    StringToNominal.java
19 *    Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.filters.unsupervised.attribute;
25
26import weka.core.Attribute;
27import weka.core.Capabilities;
28import weka.core.FastVector;
29import weka.core.Instance; 
30import weka.core.DenseInstance;
31import weka.core.Instances;
32import weka.core.Option;
33import weka.core.OptionHandler;
34import weka.core.Range;
35import weka.core.RevisionUtils;
36import weka.core.UnsupportedAttributeTypeException;
37import weka.core.Utils;
38import weka.core.Capabilities.Capability;
39import weka.filters.Filter;
40import weka.filters.UnsupervisedFilter;
41
42import java.util.Enumeration;
43import java.util.Vector;
44
45/**
46 <!-- globalinfo-start -->
47 * Converts a string attribute (i.e. unspecified number of values) to nominal (i.e. set number of values). You should ensure that all string values that will appear are represented in the first batch of the data.
48 * <p/>
49 <!-- globalinfo-end -->
50 *
51 <!-- options-start -->
52 * Valid options are: <p/>
53 *
54 * <pre> -R &lt;col&gt;
55 *  Sets the range of attribute indices (default last).</pre>
56 *
57 <!-- options-end -->
58 *
59 * @author Len Trigg (len@reeltwo.com)
60 * @version $Revision: 5987 $
61 */
62public class StringToNominal 
63  extends Filter
64  implements UnsupervisedFilter, OptionHandler {
65
66  /** for serialization */
67        private static final long serialVersionUID = 4864084427902797605L;
68       
69/** The attribute's range indices setting. */
70  private Range m_AttIndices = new Range("last"); 
71
72  /**
73   * Returns a string describing this filter
74   *
75   * @return a description of the filter suitable for
76   * displaying in the explorer/experimenter gui
77   */
78  public String globalInfo() {
79
80    return "Converts a range of string attributes (unspecified number of values) to nominal "
81      + "(set number of values). You should ensure that all string values that "
82      + "will appear are represented in the first batch of the data.";
83  }
84
85  /**
86   * Returns the Capabilities of this filter.
87   *
88   * @return            the capabilities of this object
89   * @see               Capabilities
90   */
91  public Capabilities getCapabilities() {
92    Capabilities result = super.getCapabilities();
93    result.disableAll();
94
95    // attributes
96    result.enableAllAttributes();
97    result.enable(Capability.MISSING_VALUES);
98   
99    // class
100    result.enableAllClasses();
101    result.enable(Capability.MISSING_CLASS_VALUES);
102    result.enable(Capability.NO_CLASS);
103   
104    return result;
105  }
106
107  /**
108   * Sets the format of the input instances.
109   *
110   * @param instanceInfo an Instances object containing the input
111   * instance structure (any instances contained in the object are
112   * ignored - only the structure is required).
113   * @return true if the outputFormat may be collected immediately.
114   * @throws UnsupportedAttributeTypeException if the selected attribute
115   * a string attribute.
116   * @throws Exception if the input format can't be set
117   * successfully.
118   */
119  public boolean setInputFormat(Instances instanceInfo) 
120       throws Exception {
121
122    super.setInputFormat(instanceInfo);
123    m_AttIndices.setUpper(instanceInfo.numAttributes() - 1);
124    return false;
125  }
126
127  /**
128   * Input an instance for filtering. The instance is processed
129   * and made available for output immediately.
130   *
131   * @param instance the input instance.
132   * @return true if the filtered instance may now be
133   * collected with output().
134   * @throws IllegalStateException if no input structure has been defined.
135   */
136  public boolean input(Instance instance) {
137
138    if (getInputFormat() == null) {
139      throw new IllegalStateException("No input instance format defined");
140    }
141    if (m_NewBatch) {
142      resetQueue();
143      m_NewBatch = false;
144    }
145
146    if (isOutputFormatDefined()) {
147      Instance newInstance = (Instance)instance.copy();
148      push(newInstance);
149      return true;
150    }
151
152    bufferInput(instance);
153    return false;
154  }
155
156
157  /**
158   * Signifies that this batch of input to the filter is finished. If the
159   * filter requires all instances prior to filtering, output() may now
160   * be called to retrieve the filtered instances.
161   *
162   * @return true if there are instances pending output.
163   * @throws IllegalStateException if no input structure has been defined.
164   */
165  public boolean batchFinished() {
166
167    if (getInputFormat() == null) {
168      throw new IllegalStateException("No input instance format defined");
169    }
170    if (!isOutputFormatDefined()) {
171
172      setOutputFormat();
173
174      // Convert pending input instances
175      for(int i = 0; i < getInputFormat().numInstances(); i++) {
176        push((Instance) getInputFormat().instance(i).copy());
177      }
178    } 
179
180    flushInput();
181    m_NewBatch = true;
182    return (numPendingOutput() != 0);
183  }
184
185
186  /**
187   * Returns an enumeration describing the available options.
188   *
189   * @return an enumeration of all the available options.
190   */
191  public Enumeration<Option> listOptions() {
192
193    Vector<Option> newVector = new Vector<Option>(1);
194
195    newVector.addElement(new Option(
196              "\tSets the range of attribute indices (default last).",
197              "R", 1, "-R <col>"));
198   
199    newVector.addElement(new Option(
200            "\tInvert the range specified by -R.",
201            "V", 1, "-V <col>"));
202
203    return newVector.elements();
204  }
205
206
207  /**
208   * Parses a given list of options. <p/>
209   *
210   <!-- options-start -->
211   * Valid options are: <p/>
212   *
213   * <pre> -R &lt;col&gt;
214   *  Sets the range of attribute indices (default last).</pre>
215   * 
216   * <pre> -V &lt;col&gt;
217   *  Inverts the selection specified by -R.</pre>
218   *
219   <!-- options-end -->
220   *
221   * @param options the list of options as an array of strings
222   * @throws Exception if an option is not supported
223   */
224  public void setOptions(String[] options) throws Exception {
225   
226    String attIndices = Utils.getOption('R', options);
227    if (attIndices.length() != 0) {
228      setAttributeRange(attIndices);
229    } else {
230      setAttributeRange("last");
231    }
232   
233    String invertSelection = Utils.getOption('V', options);
234    if (invertSelection.length() != 0) {
235      m_AttIndices.setInvert(true);
236    } else {
237        m_AttIndices.setInvert(false);
238    }
239       
240    if (getInputFormat() != null) {
241      setInputFormat(getInputFormat());
242    }
243  }
244
245  /**
246   * Gets the current settings of the filter.
247   *
248   * @return an array of strings suitable for passing to setOptions
249   */
250  public String [] getOptions() {
251
252    String [] options = new String [this.m_AttIndices.getInvert() ? 7 : 6];
253    int current = 0;
254
255    options[current++] = "-R";
256    options[current++] = "" + (getAttributeRange());
257   
258   
259    while (current < options.length) {
260      options[current++] = "";
261    }
262   
263    if(this.m_AttIndices.getInvert()) {
264        options[current++] = "-V";
265    }
266   
267    return options;
268  }
269
270  /**
271   * @return tip text for this property suitable for
272   * displaying in the explorer/experimenter gui
273   */
274  public String attributeRangeTipText() {
275
276    return "Sets which attributes to process. This attributes "
277      + "must be string attributes (\"first\" and \"last\" are valid values " +
278                "as well as ranges and lists)";
279  }
280
281  /**
282   * Get the range of indices of the attributes used.
283   *
284   * @return the index of the attribute
285   */
286  public String getAttributeRange() {
287
288    return m_AttIndices.getRanges();
289  }
290
291  /**
292   * Sets range of indices of the attributes used.
293   *
294   * @param rangeList the list of attribute indices
295   */
296  public void setAttributeRange(String rangeList) {
297   
298    m_AttIndices.setRanges(rangeList);
299  }
300
301  /**
302   * Set the output format. Takes the current average class values
303   * and m_InputFormat and calls setOutputFormat(Instances)
304   * appropriately.
305   */
306  private void setOutputFormat() {
307   
308    Instances newData;
309    FastVector newAtts, newVals;
310     
311    // Compute new attributes
312     
313    newAtts = new FastVector(getInputFormat().numAttributes());
314    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
315      Attribute att = getInputFormat().attribute(j);
316      if(!m_AttIndices.isInRange(j) || !att.isString()) {
317
318        // We don't have to copy the attribute because the
319        // attribute index remains unchanged.
320        newAtts.addElement(att); 
321      } else {
322         
323        // Compute list of attribute values
324        newVals = new FastVector(att.numValues());
325        for (int i = 0; i < att.numValues(); i++) {
326          newVals.addElement(att.value(i)); 
327        }
328        newAtts.addElement(new Attribute(att.name(), newVals));
329      }
330    }
331     
332    // Construct new header
333    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
334    newData.setClassIndex(getInputFormat().classIndex());
335    setOutputFormat(newData);
336  }
337 
338  /**
339   * Returns the revision string.
340   *
341   * @return            the revision
342   */
343  public String getRevision() {
344    return RevisionUtils.extract("$Revision: 5987 $");
345  }
346 
347  /**
348   * Main method for testing this class.
349   *
350   * @param argv should contain arguments to the filter:
351   * use -h for help
352   */
353  public static void main(String [] argv) {
354    runFilter(new StringToNominal(), argv);
355  }
356}
Note: See TracBrowser for help on using the repository browser.