source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/Copy.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 11.8 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    Copy.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.filters.unsupervised.attribute;
25
26import weka.core.Attribute;
27import weka.core.Capabilities;
28import weka.core.Instance; 
29import weka.core.DenseInstance;
30import weka.core.Instances;
31import weka.core.Option;
32import weka.core.OptionHandler;
33import weka.core.Range;
34import weka.core.RevisionUtils;
35import weka.core.SparseInstance;
36import weka.core.Utils;
37import weka.core.Capabilities.Capability;
38import weka.filters.Filter;
39import weka.filters.StreamableFilter;
40import weka.filters.UnsupervisedFilter;
41
42import java.util.Enumeration;
43import java.util.Vector;
44
45/**
46 <!-- globalinfo-start -->
47 * An instance filter that copies a range of attributes in the dataset. This is used in conjunction with other filters that overwrite attribute values during the course of their operation -- this filter allows the original attributes to be kept as well as the new attributes.
48 * <p/>
49 <!-- globalinfo-end -->
50 *
51 <!-- options-start -->
52 * Valid options are: <p/>
53 *
54 * <pre> -R &lt;index1,index2-index4,...&gt;
55 *  Specify list of columns to copy. First and last are valid
56 *  indexes. (default none)</pre>
57 *
58 * <pre> -V
59 *  Invert matching sense (i.e. copy all non-specified columns)</pre>
60 *
61 <!-- options-end -->
62 *
63 * @author Len Trigg (trigg@cs.waikato.ac.nz)
64 * @version $Revision: 5987 $
65 */
66public class Copy 
67  extends Filter
68  implements UnsupervisedFilter, StreamableFilter, OptionHandler {
69 
70  /** for serialization */
71  static final long serialVersionUID = -8543707493627441566L;
72
73  /** Stores which columns to copy */
74  protected Range m_CopyCols = new Range();
75
76  /**
77   * Stores the indexes of the selected attributes in order, once the
78   * dataset is seen
79   */
80  protected int [] m_SelectedAttributes;
81
82  /**
83   * Returns an enumeration describing the available options.
84   *
85   * @return an enumeration of all the available options.
86   */
87  public Enumeration listOptions() {
88
89    Vector newVector = new Vector(2);
90
91    newVector.addElement(new Option(
92              "\tSpecify list of columns to copy. First and last are valid\n"
93              +"\tindexes. (default none)",
94              "R", 1, "-R <index1,index2-index4,...>"));
95    newVector.addElement(new Option(
96              "\tInvert matching sense (i.e. copy all non-specified columns)",
97              "V", 0, "-V"));
98
99    return newVector.elements();
100  }
101
102  /**
103   * Parses a given list of options. <p/>
104   *
105   <!-- options-start -->
106   * Valid options are: <p/>
107   *
108   * <pre> -R &lt;index1,index2-index4,...&gt;
109   *  Specify list of columns to copy. First and last are valid
110   *  indexes. (default none)</pre>
111   *
112   * <pre> -V
113   *  Invert matching sense (i.e. copy all non-specified columns)</pre>
114   *
115   <!-- options-end -->
116   *
117   * @param options the list of options as an array of strings
118   * @throws Exception if an option is not supported
119   */
120  public void setOptions(String[] options) throws Exception {
121
122    String copyList = Utils.getOption('R', options);
123    if (copyList.length() != 0) {
124      setAttributeIndices(copyList);
125    }
126    setInvertSelection(Utils.getFlag('V', options));
127   
128    if (getInputFormat() != null) {
129      setInputFormat(getInputFormat());
130    }
131  }
132
133  /**
134   * Gets the current settings of the filter.
135   *
136   * @return an array of strings suitable for passing to setOptions
137   */
138  public String [] getOptions() {
139
140    String [] options = new String [3];
141    int current = 0;
142
143    if (getInvertSelection()) {
144      options[current++] = "-V";
145    }
146    if (!getAttributeIndices().equals("")) {
147      options[current++] = "-R"; options[current++] = getAttributeIndices();
148    }
149
150    while (current < options.length) {
151      options[current++] = "";
152    }
153    return options;
154  }
155
156  /**
157   * Returns the Capabilities of this filter.
158   *
159   * @return            the capabilities of this object
160   * @see               Capabilities
161   */
162  public Capabilities getCapabilities() {
163    Capabilities result = super.getCapabilities();
164    result.disableAll();
165
166    // attributes
167    result.enableAllAttributes();
168    result.enable(Capability.MISSING_VALUES);
169   
170    // class
171    result.enableAllClasses();
172    result.enable(Capability.MISSING_CLASS_VALUES);
173    result.enable(Capability.NO_CLASS);
174   
175    return result;
176  }
177
178  /**
179   * Sets the format of the input instances.
180   *
181   * @param instanceInfo an Instances object containing the input instance
182   * structure (any instances contained in the object are ignored - only the
183   * structure is required).
184   * @return true if the outputFormat may be collected immediately
185   * @throws Exception if a problem occurs setting the input format
186   */
187  public boolean setInputFormat(Instances instanceInfo) throws Exception {
188
189    super.setInputFormat(instanceInfo);
190   
191    m_CopyCols.setUpper(instanceInfo.numAttributes() - 1);
192
193    // Create the output buffer
194    Instances outputFormat = new Instances(instanceInfo, 0); 
195    m_SelectedAttributes = m_CopyCols.getSelection();
196    for (int i = 0; i < m_SelectedAttributes.length; i++) {
197      int current = m_SelectedAttributes[i];
198      // Create a copy of the attribute with a different name
199      Attribute origAttribute = instanceInfo.attribute(current);
200      outputFormat.insertAttributeAt((Attribute)origAttribute.copy(),
201                                     outputFormat.numAttributes());
202      outputFormat.renameAttribute(outputFormat.numAttributes() - 1,
203                                   "Copy of " + origAttribute.name());
204
205    }
206
207    // adapt locators
208    int[] newIndices = new int[instanceInfo.numAttributes() + m_SelectedAttributes.length];
209    for (int i = 0; i < instanceInfo.numAttributes(); i++)
210      newIndices[i] = i;
211    for (int i = 0; i < m_SelectedAttributes.length; i++)
212      newIndices[instanceInfo.numAttributes() + i] = m_SelectedAttributes[i];
213    initInputLocators(instanceInfo, newIndices);
214
215    setOutputFormat(outputFormat);
216   
217    return true;
218  }
219 
220
221  /**
222   * Input an instance for filtering. Ordinarily the instance is processed
223   * and made available for output immediately. Some filters require all
224   * instances be read before producing output.
225   *
226   * @param instance the input instance
227   * @return true if the filtered instance may now be
228   * collected with output().
229   * @throws IllegalStateException if no input format has been defined.
230   */
231  public boolean input(Instance instance) {
232
233    if (getInputFormat() == null) {
234      throw new IllegalStateException("No input instance format defined");
235    }
236    if (m_NewBatch) {
237      resetQueue();
238      m_NewBatch = false;
239    }
240
241    double[] vals = new double[outputFormatPeek().numAttributes()];
242    for(int i = 0; i < getInputFormat().numAttributes(); i++) {
243      vals[i] = instance.value(i);
244    }
245    int j = getInputFormat().numAttributes();
246    for (int i = 0; i < m_SelectedAttributes.length; i++) {
247      int current = m_SelectedAttributes[i];
248      vals[i + j] = instance.value(current);
249    }
250    Instance inst = null;
251    if (instance instanceof SparseInstance) {
252      inst = new SparseInstance(instance.weight(), vals);
253    } else {
254      inst = new DenseInstance(instance.weight(), vals);
255    }
256   
257    inst.setDataset(getOutputFormat());
258    copyValues(inst, false, instance.dataset(), getOutputFormat());
259    inst.setDataset(getOutputFormat());
260    push(inst);
261    return true;
262  }
263
264  /**
265   * Returns a string describing this filter
266   *
267   * @return a description of the filter suitable for
268   * displaying in the explorer/experimenter gui
269   */
270  public String globalInfo() {
271
272    return "An instance filter that copies a range of attributes in the"
273      + " dataset. This is used in conjunction with other filters that"
274      + " overwrite attribute values during the course of their operation --"
275      + " this filter allows the original attributes to be kept as well"
276      + " as the new attributes.";
277  }
278
279  /**
280   * Returns the tip text for this property
281   *
282   * @return tip text for this property suitable for
283   * displaying in the explorer/experimenter gui
284   */
285  public String invertSelectionTipText() {
286    return "Sets copy selected vs unselected action."
287      + " If set to false, only the specified attributes will be copied;"
288      + " If set to true, non-specified attributes will be copied.";
289  }
290
291  /**
292   * Get whether the supplied columns are to be removed or kept
293   *
294   * @return true if the supplied columns will be kept
295   */
296  public boolean getInvertSelection() {
297
298    return m_CopyCols.getInvert();
299  }
300
301  /**
302   * Set whether selected columns should be removed or kept. If true the
303   * selected columns are kept and unselected columns are copied. If false
304   * selected columns are copied and unselected columns are kept. <br>
305   * Note: use this method before you call
306   * <code>setInputFormat(Instances)</code>, since the output format is
307   * determined in that method.
308   *
309   * @param invert the new invert setting
310   */
311  public void setInvertSelection(boolean invert) {
312
313    m_CopyCols.setInvert(invert);
314  }
315
316  /**
317   * Get the current range selection
318   *
319   * @return a string containing a comma separated list of ranges
320   */
321  public String getAttributeIndices() {
322
323    return m_CopyCols.getRanges();
324  }
325
326  /**
327   * Returns the tip text for this property
328   *
329   * @return tip text for this property suitable for
330   * displaying in the explorer/experimenter gui
331   */
332  public String attributeIndicesTipText() {
333    return "Specify range of attributes to act on."
334      + " This is a comma separated list of attribute indices, with"
335      + " \"first\" and \"last\" valid values. Specify an inclusive"
336      + " range with \"-\". E.g: \"first-3,5,6-10,last\".";
337  }
338
339  /**
340   * Set which attributes are to be copied (or kept if invert is true)
341   *
342   * @param rangeList a string representing the list of attributes.  Since
343   * the string will typically come from a user, attributes are indexed from
344   * 1. <br>
345   * eg: first-3,5,6-last<br>
346   * Note: use this method before you call
347   * <code>setInputFormat(Instances)</code>, since the output format is
348   * determined in that method.
349   * @throws Exception if an invalid range list is supplied
350   */
351  public void setAttributeIndices(String rangeList) throws Exception {
352
353    m_CopyCols.setRanges(rangeList);
354  }
355
356  /**
357   * Set which attributes are to be copied (or kept if invert is true)
358   *
359   * @param attributes an array containing indexes of attributes to select.
360   * Since the array will typically come from a program, attributes are indexed
361   * from 0.<br>
362   * Note: use this method before you call
363   * <code>setInputFormat(Instances)</code>, since the output format is
364   * determined in that method.
365   * @throws Exception if an invalid set of ranges is supplied
366   */
367  public void setAttributeIndicesArray(int [] attributes) throws Exception {
368
369    setAttributeIndices(Range.indicesToRangeList(attributes));
370  }
371 
372  /**
373   * Returns the revision string.
374   *
375   * @return            the revision
376   */
377  public String getRevision() {
378    return RevisionUtils.extract("$Revision: 5987 $");
379  }
380
381  /**
382   * Main method for testing this class.
383   *
384   * @param argv should contain arguments to the filter: use -h for help
385   */
386  public static void main(String [] argv) {
387    runFilter(new Copy(), argv);
388  }
389}
Note: See TracBrowser for help on using the repository browser.