source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/Reorder.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 13.4 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * Reorder.java
19 * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.filters.unsupervised.attribute;
25
26import weka.core.Attribute;
27import weka.core.Capabilities;
28import weka.core.FastVector;
29import weka.core.Instance; 
30import weka.core.DenseInstance;
31import weka.core.Instances;
32import weka.core.Option;
33import weka.core.OptionHandler;
34import weka.core.Range;
35import weka.core.RevisionUtils;
36import weka.core.SparseInstance;
37import weka.core.Utils;
38import weka.core.Capabilities.Capability;
39import weka.filters.Filter;
40import weka.filters.StreamableFilter;
41import weka.filters.UnsupervisedFilter;
42
43import java.util.Enumeration;
44import java.util.StringTokenizer;
45import java.util.Vector;
46
47/**
48 <!-- globalinfo-start -->
49 * An instance filter that generates output with a new order of the attributes. Useful if one wants to move an attribute to the end to use it as class attribute (e.g. with using "-R 2-last,1").<br/>
50 * But it's not only possible to change the order of all the attributes, but also to leave out attributes. E.g. if you have 10 attributes, you can generate the following output order: 1,3,5,7,9,10 or 10,1-5.<br/>
51 * You can also duplicate attributes, e.g. for further processing later on: e.g. 1,1,1,4,4,4,2,2,2 where the second and the third column of each attribute are processed differently and the first one, i.e. the original one is kept.<br/>
52 * One can simply inverse the order of the attributes via 'last-first'.<br/>
53 * After appyling the filter, the index of the class attribute is the last attribute.
54 * <p/>
55 <!-- globalinfo-end -->
56 *
57 <!-- options-start -->
58 * Valid options are: <p/>
59 *
60 * <pre> -R &lt;index1,index2-index4,...&gt;
61 *  Specify list of columns to copy. First and last are valid
62 *  indexes. (default first-last)</pre>
63 *
64 <!-- options-end -->
65 *
66 * @author FracPete (fracpete at waikato dot ac dot nz)
67 * @version $Revision: 5987 $
68 */
69public class Reorder 
70  extends Filter
71  implements UnsupervisedFilter, StreamableFilter, OptionHandler {
72 
73  /** for serialization */
74  static final long serialVersionUID = -1135571321097202292L;
75
76  /** Stores which columns to reorder */
77  protected String m_NewOrderCols = "first-last";
78
79  /**
80   * Stores the indexes of the selected attributes in order, once the
81   * dataset is seen
82   */
83  protected int[] m_SelectedAttributes;
84
85  /**
86   * Contains an index of string attributes in the input format
87   * that survive the filtering process -- some entries may be duplicated
88   */
89  protected int[] m_InputStringIndex;
90
91  /**
92   * Returns an enumeration describing the available options.
93   *
94   * @return an enumeration of all the available options.
95   */
96  public Enumeration listOptions() {
97    Vector newVector = new Vector();
98
99    newVector.addElement(new Option(
100              "\tSpecify list of columns to copy. First and last are valid\n"
101              +"\tindexes. (default first-last)",
102              "R", 1, "-R <index1,index2-index4,...>"));
103
104    return newVector.elements();
105  }
106
107  /**
108   * Parses a given list of options. <p/>
109   *
110   <!-- options-start -->
111   * Valid options are: <p/>
112   *
113   * <pre> -R &lt;index1,index2-index4,...&gt;
114   *  Specify list of columns to copy. First and last are valid
115   *  indexes. (default first-last)</pre>
116   *
117   <!-- options-end -->
118   *
119   * @param options the list of options as an array of strings
120   * @throws Exception if an option is not supported
121   */
122  public void setOptions(String[] options) throws Exception {
123    String orderList = Utils.getOption('R', options);
124    if (orderList.length() != 0) {
125      setAttributeIndices(orderList);
126    }
127   
128    if (getInputFormat() != null) {
129      setInputFormat(getInputFormat());
130    }
131  }
132
133  /**
134   * Gets the current settings of the filter.
135   *
136   * @return an array of strings suitable for passing to setOptions
137   */
138  public String [] getOptions() {
139    String [] options = new String [2];
140    int current = 0;
141
142    if (!getAttributeIndices().equals("")) {
143      options[current++] = "-R"; 
144      options[current++] = getAttributeIndices();
145    }
146
147    while (current < options.length) {
148      options[current++] = "";
149    }
150    return options;
151  }
152 
153  /**
154   * parses the index string and returns the corresponding int index
155   *
156   * @param s                   the index string to parse
157   * @param numAttributes       necessary for "last" and OutOfBounds checks
158   * @return                    the int index determined form the index string
159   * @throws Exception          if index is not valid
160   */
161  protected int determineIndex(String s, int numAttributes) throws Exception {
162    int         result;
163   
164    if (s.equals("first"))
165      result = 0;
166    else if (s.equals("last"))
167      result = numAttributes - 1;
168    else
169      result = Integer.parseInt(s) - 1;
170   
171    // out of bounds?
172    if ( (result < 0) || (result > numAttributes - 1) )
173      throw new IllegalArgumentException(
174          "'" + s + "' is not a valid index for the range '1-" + numAttributes + "'!");
175   
176    return result;
177  }
178 
179  /**
180   * parses the range string and returns an array with the indices
181   *
182   * @param numAttributes       necessary for "last" and OutOfBounds checks
183   * @return                    the indices determined form the range string
184   * @see                       #m_NewOrderCols
185   * @throws Exception          if range is not valid
186   */
187  protected int[] determineIndices(int numAttributes) throws Exception {
188    int[]               result;
189    Vector<Integer>     list;
190    int                 i;
191    StringTokenizer     tok;
192    String              token;
193    String[]            range;
194    int                 from;
195    int                 to;
196   
197    list = new Vector<Integer>();
198   
199    // parse range
200    tok = new StringTokenizer(m_NewOrderCols, ",");
201    while (tok.hasMoreTokens()) {
202      token = tok.nextToken();
203      if (token.indexOf("-") > -1) {
204        range = token.split("-");
205        if (range.length != 2)
206          throw new IllegalArgumentException("'" + token + "' is not a valid range!");
207        from = determineIndex(range[0], numAttributes);
208        to   = determineIndex(range[1], numAttributes);
209
210        if (from <= to) {
211          for (i = from; i <= to; i++)
212            list.add(i);
213        }
214        else {
215          for (i = from; i >= to; i--)
216            list.add(i);
217        }
218      }
219      else {
220        list.add(determineIndex(token, numAttributes));
221      }
222    }
223   
224    // turn vector into int array
225    result = new int[list.size()];
226    for (i = 0; i < list.size(); i++)
227      result[i] = list.get(i);
228   
229    return result;
230  }
231
232  /**
233   * Returns the Capabilities of this filter.
234   *
235   * @return            the capabilities of this object
236   * @see               Capabilities
237   */
238  public Capabilities getCapabilities() {
239    Capabilities result = super.getCapabilities();
240    result.disableAll();
241
242    // attribute
243    result.enableAllAttributes();
244    result.enable(Capability.MISSING_VALUES);
245   
246    // class
247    result.enableAllClasses();
248    result.enable(Capability.NO_CLASS);
249    result.enable(Capability.MISSING_CLASS_VALUES);
250   
251    return result;
252  }
253
254  /**
255   * Sets the format of the input instances.
256   *
257   * @param instanceInfo an Instances object containing the input instance
258   * structure (any instances contained in the object are ignored - only the
259   * structure is required).
260   * @return true if the outputFormat may be collected immediately
261   * @throws Exception if a problem occurs setting the input format
262   */
263  public boolean setInputFormat(Instances instanceInfo) throws Exception {
264    super.setInputFormat(instanceInfo);
265   
266    FastVector attributes = new FastVector();
267    int outputClass = -1;
268    m_SelectedAttributes = determineIndices(instanceInfo.numAttributes());
269    for (int i = 0; i < m_SelectedAttributes.length; i++) {
270      int current = m_SelectedAttributes[i];
271      if (instanceInfo.classIndex() == current) {
272        outputClass = attributes.size();
273      }
274      Attribute keep = (Attribute)instanceInfo.attribute(current).copy();
275      attributes.addElement(keep);
276    }
277   
278    initInputLocators(instanceInfo, m_SelectedAttributes);
279
280    Instances outputFormat = new Instances(instanceInfo.relationName(),
281                                           attributes, 0); 
282    outputFormat.setClassIndex(outputClass);
283    setOutputFormat(outputFormat);
284   
285    return true;
286  }
287 
288
289  /**
290   * Input an instance for filtering. Ordinarily the instance is processed
291   * and made available for output immediately. Some filters require all
292   * instances be read before producing output.
293   *
294   * @param instance the input instance
295   * @return true if the filtered instance may now be
296   * collected with output().
297   * @throws IllegalStateException if no input format has been defined.
298   */
299  public boolean input(Instance instance) {
300    if (getInputFormat() == null) {
301      throw new IllegalStateException("No input instance format defined");
302    }
303    if (m_NewBatch) {
304      resetQueue();
305      m_NewBatch = false;
306    }
307
308    double[] vals = new double[outputFormatPeek().numAttributes()];
309    for (int i = 0; i < m_SelectedAttributes.length; i++) {
310      int current = m_SelectedAttributes[i];
311      vals[i] = instance.value(current);
312    }
313    Instance inst = null;
314    if (instance instanceof SparseInstance)
315      inst = new SparseInstance(instance.weight(), vals);
316    else
317      inst = new DenseInstance(instance.weight(), vals);
318
319    inst.setDataset(getOutputFormat());
320    copyValues(inst, false, instance.dataset(), getOutputFormat());
321    inst.setDataset(getOutputFormat());
322   
323    push(inst);
324   
325    return true;
326  }
327
328  /**
329   * Returns a string describing this filter
330   *
331   * @return a description of the filter suitable for
332   * displaying in the explorer/experimenter gui
333   */
334  public String globalInfo() {
335    return 
336        "An instance filter that generates output with a new order of the "
337      + "attributes. Useful if one wants to move an attribute to the end to "
338      + "use it as class attribute (e.g. with using \"-R 2-last,1\").\n"
339      + "But it's not only possible to change the order of all the attributes, "
340      + "but also to leave out attributes. E.g. if you have 10 attributes, you "
341      + "can generate the following output order: 1,3,5,7,9,10 or 10,1-5.\n"
342      + "You can also duplicate attributes, e.g. for further processing later "
343      + "on: e.g. 1,1,1,4,4,4,2,2,2 where the second and the third column of "
344      + "each attribute are processed differently and the first one, i.e. the "
345      + "original one is kept.\n"
346      + "One can simply inverse the order of the attributes via 'last-first'.\n"
347      + "After appyling the filter, the index of the class attribute is the "
348      + "last attribute.";
349  }
350
351  /**
352   * Get the current range selection
353   *
354   * @return a string containing a comma separated list of ranges
355   */
356  public String getAttributeIndices() {
357    return m_NewOrderCols;
358  }
359
360  /**
361   * Returns the tip text for this property
362   *
363   * @return tip text for this property suitable for
364   * displaying in the explorer/experimenter gui
365   */
366  public String attributeIndicesTipText() {
367    return "Specify range of attributes to act on."
368      + " This is a comma separated list of attribute indices, with"
369      + " \"first\" and \"last\" valid values. Specify an inclusive"
370      + " range with \"-\". E.g: \"first-3,5,6-10,last\".";
371  }
372
373  /**
374   * Set which attributes are to be copied (or kept if invert is true)
375   *
376   * @param rangeList a string representing the list of attributes.  Since
377   * the string will typically come from a user, attributes are indexed from
378   * 1. <br>
379   * eg: first-3,5,6-last<br>
380   * Note: use this method before you call
381   * <code>setInputFormat(Instances)</code>, since the output format is
382   * determined in that method.
383   * @throws Exception if an invalid range list is supplied
384   */
385  public void setAttributeIndices(String rangeList) throws Exception {
386    // simple test
387    if (rangeList.replaceAll("[afilrst0-9\\-,]*", "").length() != 0)
388      throw new IllegalArgumentException("Not a valid range string!");
389   
390    m_NewOrderCols = rangeList;
391  }
392
393  /**
394   * Set which attributes are to be copied (or kept if invert is true)
395   *
396   * @param attributes an array containing indexes of attributes to select.
397   * Since the array will typically come from a program, attributes are indexed
398   * from 0.<br>
399   * Note: use this method before you call
400   * <code>setInputFormat(Instances)</code>, since the output format is
401   * determined in that method.
402   * @throws Exception if an invalid set of ranges is supplied
403   */
404  public void setAttributeIndicesArray(int [] attributes) throws Exception {
405    setAttributeIndices(Range.indicesToRangeList(attributes));
406  }
407 
408  /**
409   * Returns the revision string.
410   *
411   * @return            the revision
412   */
413  public String getRevision() {
414    return RevisionUtils.extract("$Revision: 5987 $");
415  }
416
417  /**
418   * Main method for testing this class.
419   *
420   * @param argv should contain arguments to the filter: use -h for help
421   */
422  public static void main(String [] argv) {
423    runFilter(new Reorder(), argv);
424  }
425}
Note: See TracBrowser for help on using the repository browser.