source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/AddValues.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 11.9 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * AddValues.java
19 * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.filters.unsupervised.attribute;
24
25import weka.core.Attribute;
26import weka.core.Capabilities;
27import weka.core.FastVector;
28import weka.core.Instance; 
29import weka.core.DenseInstance;
30import weka.core.Instances;
31import weka.core.Option;
32import weka.core.OptionHandler;
33import weka.core.RevisionUtils;
34import weka.core.SingleIndex;
35import weka.core.UnsupportedAttributeTypeException;
36import weka.core.Utils;
37import weka.core.Capabilities.Capability;
38import weka.filters.Filter;
39import weka.filters.StreamableFilter;
40import weka.filters.UnsupervisedFilter;
41
42import java.util.Collections;
43import java.util.Enumeration;
44import java.util.Vector;
45
46/**
47 <!-- globalinfo-start -->
48 * Adds the labels from the given list to an attribute if they are missing. The labels can also be sorted in an ascending manner. If no labels are provided then only the (optional) sorting applies.
49 * <p/>
50 <!-- globalinfo-end -->
51 *
52 <!-- options-start -->
53 * Valid options are: <p/>
54 *
55 * <pre> -C &lt;col&gt;
56 *  Sets the attribute index
57 *  (default last).</pre>
58 *
59 * <pre> -L &lt;label1,label2,...&gt;
60 *  Comma-separated list of labels to add.
61 *  (default: none)</pre>
62 *
63 * <pre> -S
64 *  Turns on the sorting of the labels.</pre>
65 *
66 <!-- options-end -->
67 *
68 * Based on code from AddValues.
69 *
70 * @author  FracPete (fracpete at waikato dot ac dot nz)
71 * @version $Revision: 5987 $
72 * @see     AddValues
73 */
74public class AddValues 
75  extends Filter
76  implements UnsupervisedFilter, StreamableFilter, OptionHandler {
77
78  /** for serialization */
79  private static final long serialVersionUID = -8100622241742393656L;
80
81  /** The attribute's index setting. */
82  protected SingleIndex m_AttIndex = new SingleIndex("last"); 
83
84  /** The values to add. */
85  protected Vector m_Labels = new Vector();
86
87  /** Whether to sort the values. */
88  protected boolean m_Sort = false;
89
90  /** the array with the sorted label indices */
91  protected int[] m_SortedIndices;
92 
93  /**
94   * Returns a string describing this filter
95   *
96   * @return            a description of the filter suitable for
97   *                    displaying in the explorer/experimenter gui
98   */
99  public String globalInfo() {
100    return 
101        "Adds the labels from the given list to an attribute if they are "
102      + "missing. The labels can also be sorted in an ascending manner. "
103      + "If no labels are provided then only the (optional) sorting applies.";
104  }
105
106  /**
107   * Returns an enumeration describing the available options.
108   *
109   * @return an enumeration of all the available options.
110   */
111  public Enumeration listOptions() {
112    Vector      result;
113   
114    result = new Vector();
115
116    result.addElement(new Option(
117        "\tSets the attribute index\n"
118        + "\t(default last).",
119        "C", 1, "-C <col>"));
120
121    result.addElement(new Option(
122        "\tComma-separated list of labels to add.\n"
123        + "\t(default: none)",
124        "L", 1, "-L <label1,label2,...>"));
125
126    result.addElement(new Option(
127        "\tTurns on the sorting of the labels.",
128        "S", 0, "-S"));
129
130    return result.elements();
131  }
132
133
134  /**
135   * Parses a given list of options. <p/>
136   *
137   <!-- options-start -->
138   * Valid options are: <p/>
139   *
140   * <pre> -C &lt;col&gt;
141   *  Sets the attribute index
142   *  (default last).</pre>
143   *
144   * <pre> -L &lt;label1,label2,...&gt;
145   *  Comma-separated list of labels to add.
146   *  (default: none)</pre>
147   *
148   * <pre> -S
149   *  Turns on the sorting of the labels.</pre>
150   *
151   <!-- options-end -->
152   *
153   * @param options the list of options as an array of strings
154   * @throws Exception if an option is not supported
155   */
156  public void setOptions(String[] options) throws Exception {
157    String      tmpStr;
158   
159    tmpStr = Utils.getOption('C', options);
160    if (tmpStr.length() != 0)
161      setAttributeIndex(tmpStr);
162    else
163      setAttributeIndex("last");
164
165    tmpStr = Utils.getOption('L', options);
166    if (tmpStr.length() != 0)
167      setLabels(tmpStr);
168    else
169      setLabels("");
170
171    setSort(Utils.getFlag('S', options));
172   
173    if (getInputFormat() != null)
174      setInputFormat(getInputFormat());
175  }
176
177  /**
178   * Gets the current settings of the filter.
179   *
180   * @return an array of strings suitable for passing to setOptions
181   */
182  public String[] getOptions() {
183    Vector      result;
184   
185    result = new Vector();
186
187    result.add("-C");
188    result.add("" + getAttributeIndex());
189   
190    result.add("-L");
191    result.add("" + getLabels());
192   
193    if (getSort())
194      result.add("-S");
195
196    return (String[]) result.toArray(new String[result.size()]);
197  }
198
199  /**
200   * Returns the Capabilities of this filter.
201   *
202   * @return            the capabilities of this object
203   * @see               Capabilities
204   */
205  public Capabilities getCapabilities() {
206    Capabilities result = super.getCapabilities();
207    result.disableAll();
208
209    // attributes
210    result.enableAllAttributes();
211    result.enable(Capability.MISSING_VALUES);
212   
213    // class
214    result.enableAllClasses();
215    result.enable(Capability.MISSING_CLASS_VALUES);
216    result.enable(Capability.NO_CLASS);
217   
218    return result;
219  }
220
221  /**
222   * Sets the format of the input instances.
223   *
224   * @param instanceInfo        an Instances object containing the input
225   *                            instance structure (any instances contained
226   *                            in the object are ignored - only the
227   *                            structure is required).
228   * @return                    true if the outputFormat may be collected
229   *                            immediately
230   * @throws Exception          if the input format can't be set successfully
231   */
232  public boolean setInputFormat(Instances instanceInfo) throws Exception {
233    Attribute   att;
234    Attribute   attNew;
235    Vector      allLabels;
236    Enumeration enm;
237    int         i;
238    FastVector  values;
239    FastVector  atts;
240    Instances   instNew;
241
242    super.setInputFormat(instanceInfo);
243   
244    m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
245    att = instanceInfo.attribute(m_AttIndex.getIndex());
246    if (!att.isNominal())
247      throw new UnsupportedAttributeTypeException("Chosen attribute not nominal.");
248   
249    // merge labels
250    allLabels = new Vector();
251    enm = att.enumerateValues();
252    while (enm.hasMoreElements())
253      allLabels.add(enm.nextElement());
254    for (i = 0; i < m_Labels.size(); i++) {
255      if (!allLabels.contains(m_Labels.get(i)))
256        allLabels.add(m_Labels.get(i));
257    }
258   
259    // generate index array
260    if (getSort())
261      Collections.sort(allLabels);
262    m_SortedIndices = new int[att.numValues()];
263    enm             = att.enumerateValues();
264    i               = 0;
265    while (enm.hasMoreElements()) {
266      m_SortedIndices[i] = allLabels.indexOf(enm.nextElement());
267      i++;
268    }
269   
270    // generate new header
271    values = new FastVector();
272    for (i = 0; i < allLabels.size(); i++)
273      values.addElement(allLabels.get(i));
274    attNew = new Attribute(att.name(), values);
275
276    atts = new FastVector();
277    for (i = 0; i < instanceInfo.numAttributes(); i++) {
278      if (i == m_AttIndex.getIndex())
279        atts.addElement(attNew);
280      else
281        atts.addElement(instanceInfo.attribute(i));
282    }
283   
284    instNew = new Instances(instanceInfo.relationName(), atts, 0);
285    instNew.setClassIndex(instanceInfo.classIndex());
286   
287    // set new format
288    setOutputFormat(instNew);
289   
290    return true;
291  }
292
293  /**
294   * Input an instance for filtering. The instance is processed
295   * and made available for output immediately.
296   *
297   * @param instance    the input instance
298   * @return            true if the filtered instance may now be
299   *                    collected with output().
300   * @throws IllegalStateException if no input format has been set.
301   */
302  public boolean input(Instance instance) {
303    Instance    newInstance;
304    double[]    values;
305
306    if (getInputFormat() == null)
307      throw new IllegalStateException("No input instance format defined");
308
309    if (m_NewBatch) {
310      resetQueue();
311      m_NewBatch = false;
312    }
313   
314    // generate new Instance
315    values = instance.toDoubleArray();
316    values[m_AttIndex.getIndex()] = m_SortedIndices[(int) values[m_AttIndex.getIndex()]];
317    newInstance = new DenseInstance(instance.weight(), values);
318
319    // copy string values etc. from input to output
320    copyValues(instance, false, instance.dataset(), getOutputFormat());
321   
322    push(newInstance);
323   
324    return true;
325  }
326
327  /**
328   * Returns the tip text for this property
329   *
330   * @return            tip text for this property suitable for
331   *                    displaying in the explorer/experimenter gui
332   */
333  public String attributeIndexTipText() {
334    return "Sets which attribute to process. This "
335      + "attribute must be nominal (\"first\" and \"last\" are valid values)";
336  }
337
338  /**
339   * Get the index of the attribute used.
340   *
341   * @return            the index of the attribute
342   */
343  public String getAttributeIndex() {
344    return m_AttIndex.getSingleIndex();
345  }
346
347  /**
348   * Sets index of the attribute used.
349   *
350   * @param attIndex    the index of the attribute
351   */
352  public void setAttributeIndex(String attIndex) {
353    m_AttIndex.setSingleIndex(attIndex);
354  }
355
356  /**
357   * Returns the tip text for this property
358   *
359   * @return            tip text for this property suitable for
360   *                    displaying in the explorer/experimenter gui
361   */
362  public String labelsTipText() {
363    return "Comma-separated list of lables to add.";
364  }
365
366  /**
367   * Get the comma-separated list of labels that are added.
368   *
369   * @return            the list of labels
370   */
371  public String getLabels() {
372    String      result;
373    int         i;
374
375    result = "";
376    for (i = 0; i < m_Labels.size(); i++) {
377      if (i > 0)
378        result += ",";
379      result += Utils.quote((String) m_Labels.get(i));
380    }
381   
382    return result;
383  }
384
385  /**
386   * Sets the comma-separated list of labels.
387   *
388   * @param value       the list
389   */
390  public void setLabels(String value) {
391    int         i;
392    String      label;
393    boolean     quoted;
394    boolean     add;
395   
396    m_Labels.clear();
397   
398    label  = "";
399    quoted = false;
400    add    = false;
401   
402    for (i = 0; i < value.length(); i++) {
403      // quotes?
404      if (value.charAt(i) == '"') {
405        quoted = !quoted;
406        if (!quoted)
407          add = true;
408      }
409      // comma
410      else if ( (value.charAt(i) == ',') && (!quoted) ) {
411        add = true;
412      }
413      // normal character
414      else {
415        label += value.charAt(i);
416        // last character?
417        if (i == value.length() - 1)
418          add = true;
419      }
420     
421      if (add) {
422        if (label.length() != 0)
423          m_Labels.add(label);
424        label = "";
425        add   = false;
426      }
427    }
428  }
429
430  /**
431   * Returns the tip text for this property
432   *
433   * @return            tip text for this property suitable for
434   *                    displaying in the explorer/experimenter gui
435   */
436  public String sortTipText() {
437    return "Whether to sort the labels alphabetically.";
438  }
439
440  /**
441   * Gets whether the labels are sorted or not.
442   *
443   * @return            true if the labels are sorted
444   */
445  public boolean getSort() {
446    return m_Sort;
447  }
448
449  /**
450   * Sets whether the labels are sorted.
451   *
452   * @param value       if true the labels are sorted
453   */
454  public void setSort(boolean value) {
455    m_Sort = value;
456  }
457 
458  /**
459   * Returns the revision string.
460   *
461   * @return            the revision
462   */
463  public String getRevision() {
464    return RevisionUtils.extract("$Revision: 5987 $");
465  }
466 
467  /**
468   * Main method for testing and running this class.
469   *
470   * @param args        should contain arguments to the filter:
471   *                    use -h for help
472   */
473  public static void main(String[] args) {
474    runFilter(new AddValues(), args);
475  }
476}
Note: See TracBrowser for help on using the repository browser.