source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/MergeTwoValues.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 12.8 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    MergeTwoValues.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.filters.unsupervised.attribute;
25
26import weka.core.Attribute;
27import weka.core.Capabilities;
28import weka.core.FastVector;
29import weka.core.Instance; 
30import weka.core.DenseInstance;
31import weka.core.Instances;
32import weka.core.Option;
33import weka.core.OptionHandler;
34import weka.core.RevisionUtils;
35import weka.core.SingleIndex;
36import weka.core.UnsupportedAttributeTypeException;
37import weka.core.Utils;
38import weka.core.WekaException;
39import weka.core.Capabilities.Capability;
40import weka.filters.Filter;
41import weka.filters.StreamableFilter;
42import weka.filters.UnsupervisedFilter;
43
44import java.util.Enumeration;
45import java.util.Vector;
46
47/**
48 <!-- globalinfo-start -->
49 * Merges two values of a nominal attribute into one value.
50 * <p/>
51 <!-- globalinfo-end -->
52 *
53 <!-- options-start -->
54 * Valid options are: <p/>
55 *
56 * <pre> -C &lt;col&gt;
57 *  Sets the attribute index (default last).</pre>
58 *
59 * <pre> -F &lt;value index&gt;
60 *  Sets the first value's index (default first).</pre>
61 *
62 * <pre> -S &lt;value index&gt;
63 *  Sets the second value's index (default last).</pre>
64 *
65 <!-- options-end -->
66 *
67 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
68 * @version $Revision: 5987 $
69 */
70public class MergeTwoValues 
71  extends Filter
72  implements UnsupervisedFilter, StreamableFilter, OptionHandler {
73
74  /** for serialization */
75  static final long serialVersionUID = 2925048980504034018L;
76 
77  /** The attribute's index setting. */
78  private SingleIndex m_AttIndex = new SingleIndex("last"); 
79
80  /** The first value's index setting. */
81  private SingleIndex m_FirstIndex = new SingleIndex("first");
82
83  /** The second value's index setting. */
84  private SingleIndex m_SecondIndex = new SingleIndex("last");
85
86  /**
87   * Returns a string describing this filter
88   *
89   * @return a description of the filter suitable for
90   * displaying in the explorer/experimenter gui
91   */
92  public String globalInfo() {
93
94    return  "Merges two values of a nominal attribute into one value.";
95  }
96
97  /**
98   * Returns the Capabilities of this filter.
99   *
100   * @return            the capabilities of this object
101   * @see               Capabilities
102   */
103  public Capabilities getCapabilities() {
104    Capabilities result = super.getCapabilities();
105    result.disableAll();
106
107    // attributes
108    result.enableAllAttributes();
109    result.enable(Capability.MISSING_VALUES);
110   
111    // class
112    result.enableAllClasses();
113    result.enable(Capability.MISSING_CLASS_VALUES);
114    result.enable(Capability.NO_CLASS);
115   
116    return result;
117  }
118
119  /**
120   * Sets the format of the input instances.
121   *
122   * @param instanceInfo an Instances object containing the input
123   * instance structure (any instances contained in the object are
124   * ignored - only the structure is required).
125   * @return true if the outputFormat may be collected immediately
126   * @throws Exception if the input format can't be set
127   * successfully
128   */
129  public boolean setInputFormat(Instances instanceInfo) 
130       throws Exception {
131
132    super.setInputFormat(instanceInfo);
133    m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
134    m_FirstIndex.setUpper(instanceInfo.
135                          attribute(m_AttIndex.getIndex()).numValues() - 1);
136    m_SecondIndex.setUpper(instanceInfo.
137                           attribute(m_AttIndex.getIndex()).numValues() - 1);
138    if ((instanceInfo.classIndex() > -1) && (instanceInfo.classIndex() == m_AttIndex.getIndex())) {
139      throw new Exception("Cannot process class attribute.");
140    }
141    if (!instanceInfo.attribute(m_AttIndex.getIndex()).isNominal()) {
142      throw new UnsupportedAttributeTypeException("Chosen attribute not nominal.");
143    }
144    if (instanceInfo.attribute(m_AttIndex.getIndex()).numValues() < 2) {
145      throw new UnsupportedAttributeTypeException("Chosen attribute has less than " +
146                                                  "two values.");
147    }
148    if (m_SecondIndex.getIndex() <= m_FirstIndex.getIndex()) {
149      // XXX Maybe we should just swap the values??
150      throw new Exception("The second index has to be greater "+
151                          "than the first.");
152    }
153    setOutputFormat();
154    return true;
155  }
156
157  /**
158   * Input an instance for filtering. The instance is processed
159   * and made available for output immediately.
160   *
161   * @param instance the input instance
162   * @return true if the filtered instance may now be
163   * collected with output().
164   * @throws IllegalStateException if no input format has been set.
165   */
166  public boolean input(Instance instance) {
167
168    if (getInputFormat() == null) {
169      throw new IllegalStateException("No input instance format defined");
170    }
171    if (m_NewBatch) {
172      resetQueue();
173      m_NewBatch = false;
174    }
175    Instance newInstance = (Instance)instance.copy();
176    if ((int)newInstance.value(m_AttIndex.getIndex()) == m_SecondIndex.getIndex()) {
177      newInstance.setValue(m_AttIndex.getIndex(), (double)m_FirstIndex.getIndex());
178    }
179    else if ((int)newInstance.value(m_AttIndex.getIndex()) > m_SecondIndex.getIndex()) {
180      newInstance.setValue(m_AttIndex.getIndex(),
181                           newInstance.value(m_AttIndex.getIndex()) - 1);
182    }
183    push(newInstance);
184    return true;
185  }
186
187  /**
188   * Returns an enumeration describing the available options.
189   *
190   * @return an enumeration of all the available options.
191   */
192  public Enumeration listOptions() {
193
194    Vector newVector = new Vector(3);
195
196    newVector.addElement(new Option(
197              "\tSets the attribute index (default last).",
198              "C", 1, "-C <col>"));
199
200    newVector.addElement(new Option(
201              "\tSets the first value's index (default first).",
202              "F", 1, "-F <value index>"));
203
204    newVector.addElement(new Option(
205              "\tSets the second value's index (default last).",
206              "S", 1, "-S <value index>"));
207
208    return newVector.elements();
209  }
210
211
212  /**
213   * Parses a given list of options. <p/>
214   *
215   <!-- options-start -->
216   * Valid options are: <p/>
217   *
218   * <pre> -C &lt;col&gt;
219   *  Sets the attribute index (default last).</pre>
220   *
221   * <pre> -F &lt;value index&gt;
222   *  Sets the first value's index (default first).</pre>
223   *
224   * <pre> -S &lt;value index&gt;
225   *  Sets the second value's index (default last).</pre>
226   *
227   <!-- options-end -->
228   *
229   * @param options the list of options as an array of strings
230   * @throws Exception if an option is not supported
231   */
232  public void setOptions(String[] options) throws Exception {
233   
234    String attIndex = Utils.getOption('C', options);
235    if (attIndex.length() != 0) {
236      setAttributeIndex(attIndex);
237    } else {
238      setAttributeIndex("last");
239    }
240
241    String firstValIndex = Utils.getOption('F', options);
242    if (firstValIndex.length() != 0) {
243      setFirstValueIndex(firstValIndex);
244    } else {
245      setFirstValueIndex("first");
246    }
247
248    String secondValIndex = Utils.getOption('S', options);
249    if (secondValIndex.length() != 0) {
250      setSecondValueIndex(secondValIndex);
251    } else {
252      setSecondValueIndex("last");
253    }
254   
255    if (getInputFormat() != null) {
256      setInputFormat(getInputFormat());
257    }
258  }
259
260  /**
261   * Gets the current settings of the filter.
262   *
263   * @return an array of strings suitable for passing to setOptions
264   */
265  public String [] getOptions() {
266
267    String [] options = new String [6];
268    int current = 0;
269
270    options[current++] = "-C";
271    options[current++] = "" + getAttributeIndex();
272    options[current++] = "-F"; 
273    options[current++] = "" + getFirstValueIndex();
274    options[current++] = "-S"; 
275    options[current++] = "" + getSecondValueIndex();
276    while (current < options.length) {
277      options[current++] = "";
278    }
279    return options;
280  }
281
282  /**
283   * @return tip text for this property suitable for
284   * displaying in the explorer/experimenter gui
285   */
286  public String attributeIndexTipText() {
287
288    return "Sets which attribute to process. This "
289      + "attribute must be nominal (\"first\" and \"last\" are valid values)";
290  }
291
292  /**
293   * Get the index of the attribute used.
294   *
295   * @return the index of the attribute
296   */
297  public String getAttributeIndex() {
298
299    return m_AttIndex.getSingleIndex();
300  }
301
302  /**
303   * Sets index of the attribute used.
304   *
305   * @param attIndex the index of the attribute
306   */
307  public void setAttributeIndex(String attIndex) {
308   
309    m_AttIndex.setSingleIndex(attIndex);
310  }
311
312  /**
313   * @return tip text for this property suitable for
314   * displaying in the explorer/experimenter gui
315   */
316  public String firstValueIndexTipText() {
317
318    return "Sets the first value to be merged. "
319      + "(\"first\" and \"last\" are valid values)";
320  }
321
322  /**
323   * Get the index of the first value used.
324   *
325   * @return the index of the first value
326   */
327  public String getFirstValueIndex() {
328
329    return m_FirstIndex.getSingleIndex();
330  }
331
332  /**
333   * Sets index of the first value used.
334   *
335   * @param firstIndex the index of the first value
336   */
337  public void setFirstValueIndex(String firstIndex) {
338   
339    m_FirstIndex.setSingleIndex(firstIndex);
340  }
341
342  /**
343   * @return tip text for this property suitable for
344   * displaying in the explorer/experimenter gui
345   */
346  public String secondValueIndexTipText() {
347
348    return "Sets the second value to be merged. "
349      + "(\"first\" and \"last\" are valid values)";
350  }
351
352  /**
353   * Get the index of the second value used.
354   *
355   * @return the index of the second value
356   */
357  public String getSecondValueIndex() {
358
359    return m_SecondIndex.getSingleIndex();
360  }
361
362  /**
363   * Sets index of the second value used.
364   *
365   * @param secondIndex the index of the second value
366   */
367  public void setSecondValueIndex(String secondIndex) {
368   
369    m_SecondIndex.setSingleIndex(secondIndex);
370  }
371
372  /**
373   * Set the output format. Takes the current average class values
374   * and m_InputFormat and calls setOutputFormat(Instances)
375   * appropriately.
376   */
377  private void setOutputFormat() {
378   
379    Instances newData;
380    FastVector newAtts, newVals;
381    boolean firstEndsWithPrime = false, 
382      secondEndsWithPrime = false;
383    StringBuffer text = new StringBuffer();
384     
385    // Compute new attributes
386     
387    newAtts = new FastVector(getInputFormat().numAttributes());
388    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
389      Attribute att = getInputFormat().attribute(j);
390      if (j != m_AttIndex.getIndex()) {
391        newAtts.addElement(att.copy());
392      } else {
393         
394        // Compute new value
395         
396        if (att.value(m_FirstIndex.getIndex()).endsWith("'")) {
397          firstEndsWithPrime = true;
398        }
399        if (att.value(m_SecondIndex.getIndex()).endsWith("'")) {
400          secondEndsWithPrime = true;
401        }
402        if (firstEndsWithPrime || secondEndsWithPrime) {
403          text.append("'");
404        }
405        if (firstEndsWithPrime) {
406          text.append(((String)att.value(m_FirstIndex.getIndex())).
407                      substring(1, ((String)att.value(m_FirstIndex.getIndex())).
408                                length() - 1));
409        } else {
410          text.append((String)att.value(m_FirstIndex.getIndex()));
411        }
412        text.append('_');
413        if (secondEndsWithPrime) {
414          text.append(((String)att.value(m_SecondIndex.getIndex())).
415                      substring(1, ((String)att.value(m_SecondIndex.getIndex())).
416                                length() - 1));
417        } else {
418          text.append((String)att.value(m_SecondIndex.getIndex()));
419        }
420        if (firstEndsWithPrime || secondEndsWithPrime) {
421          text.append("'");
422        }
423         
424        // Compute list of attribute values
425         
426        newVals = new FastVector(att.numValues() - 1);
427        for (int i = 0; i < att.numValues(); i++) {
428          if (i == m_FirstIndex.getIndex()) {
429            newVals.addElement(text.toString());
430          } else if (i != m_SecondIndex.getIndex()) {
431            newVals.addElement(att.value(i));
432          }
433        }
434        newAtts.addElement(new Attribute(att.name(), newVals));
435      }
436    }
437     
438    // Construct new header
439     
440    newData = new Instances(getInputFormat().relationName(), newAtts,
441                            0);
442    newData.setClassIndex(getInputFormat().classIndex());
443    setOutputFormat(newData);
444  }
445 
446  /**
447   * Returns the revision string.
448   *
449   * @return            the revision
450   */
451  public String getRevision() {
452    return RevisionUtils.extract("$Revision: 5987 $");
453  }
454 
455  /**
456   * Main method for testing this class.
457   *
458   * @param argv should contain arguments to the filter:
459   * use -h for help
460   */
461  public static void main(String [] argv) {
462    runFilter(new MergeTwoValues(), argv);
463  }
464}
Note: See TracBrowser for help on using the repository browser.