source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/AddID.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 10.2 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * AddID.java
19 * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.filters.unsupervised.attribute;
23
24import weka.core.Attribute;
25import weka.core.Capabilities;
26import weka.core.Instance; 
27import weka.core.DenseInstance;
28import weka.core.Instances;
29import weka.core.Option;
30import weka.core.OptionHandler;
31import weka.core.RevisionUtils;
32import weka.core.SingleIndex;
33import weka.core.Utils;
34import weka.core.Capabilities.Capability;
35import weka.filters.Filter;
36import weka.filters.UnsupervisedFilter;
37
38import java.util.Enumeration;
39import java.util.Vector;
40
41/**
42 <!-- globalinfo-start -->
43 * An instance filter that adds an ID attribute to the dataset. The new attribute contains a unique ID for each instance.<br/>
44 * Note: The ID is not reset for the second batch of files (using -b and -r and -s).
45 * <p/>
46 <!-- globalinfo-end -->
47 *
48 <!-- options-start -->
49 * Valid options are: <p/>
50 *
51 * <pre> -C &lt;index&gt;
52 *  Specify where to insert the ID. First and last
53 *  are valid indexes.(default first)</pre>
54 *
55 * <pre> -N &lt;name&gt;
56 *  Name of the new attribute.
57 *  (default = 'ID')</pre>
58 *
59 <!-- options-end -->
60 *
61 * @author  fracpete (fracpete at waikato dot ac dot nz)
62 * @version $Revision: 5987 $
63 */
64public class AddID
65  extends Filter
66  implements UnsupervisedFilter, OptionHandler {
67 
68  /** for serialization */
69  static final long serialVersionUID = 4734383199819293390L;
70
71  /** the index of the attribute */
72  protected SingleIndex m_Index = new SingleIndex("first");
73
74  /** the name of the attribute */
75  protected String m_Name = "ID";
76 
77  /** the counter for the ID */
78  protected int m_Counter = -1;
79 
80  /**
81   * Returns a string describing this filter
82   *
83   * @return            a description of the filter suitable for
84   *                    displaying in the explorer/experimenter gui
85   */
86  public String globalInfo() {
87    return 
88        "An instance filter that adds an ID attribute to the dataset. "
89      + "The new attribute contains a unique ID for each instance.\n"
90      + "Note: The ID is not reset for the second batch of files (using -b "
91      + "and -r and -s).";
92  }
93
94  /**
95   * Returns an enumeration describing the available options.
96   *
97   * @return an enumeration of all the available options.
98   */
99  public Enumeration listOptions() {
100    Vector result = new Vector();
101
102    result.addElement(new Option(
103              "\tSpecify where to insert the ID. First and last\n"
104              +"\tare valid indexes.(default first)",
105              "C", 1, "-C <index>"));
106
107    result.addElement(new Option(
108              "\tName of the new attribute.\n"
109              +"\t(default = 'ID')",
110              "N", 1,"-N <name>"));
111
112    return result.elements();
113  }
114
115
116  /**
117   * Parses a given list of options. <p/>
118   *
119   <!-- options-start -->
120   * Valid options are: <p/>
121   *
122   * <pre> -C &lt;index&gt;
123   *  Specify where to insert the ID. First and last
124   *  are valid indexes.(default first)</pre>
125   *
126   * <pre> -N &lt;name&gt;
127   *  Name of the new attribute.
128   *  (default = 'ID')</pre>
129   *
130   <!-- options-end -->
131   *
132   * @param options the list of options as an array of strings
133   * @throws Exception if an option is not supported
134   */
135  public void setOptions(String[] options) throws Exception {
136    String      tmpStr;
137
138    tmpStr = Utils.getOption('C', options);
139    if (tmpStr.length() != 0)
140      m_Index.setSingleIndex(tmpStr);
141    else
142      m_Index.setSingleIndex("first");
143   
144    tmpStr = Utils.getOption('N', options);
145    if (tmpStr.length() != 0)
146      m_Name = tmpStr;
147    else
148      m_Name = "ID";
149
150    if (getInputFormat() != null)
151      setInputFormat(getInputFormat());
152  }
153
154  /**
155   * Gets the current settings of the filter.
156   *
157   * @return an array of strings suitable for passing to setOptions
158   */
159  public String[] getOptions() {
160    Vector      result;
161   
162    result = new Vector();
163
164    result.add("-C");
165    result.add(getIDIndex());
166
167    result.add("-N");
168    result.add(getAttributeName());
169   
170    return (String[]) result.toArray(new String[result.size()]);
171  }
172
173  /**
174   * Returns the tip text for this property
175   *
176   * @return tip text for this property suitable for
177   * displaying in the explorer/experimenter gui
178   */
179  public String attributeNameTipText() {
180    return "Set the new attribute's name.";
181  }
182
183  /**
184   * Get the name of the attribute to be created
185   *
186   * @return the current attribute name
187   */
188  public String getAttributeName() {
189    return m_Name;
190  }
191
192  /**
193   * Set the new attribute's name
194   *
195   * @param value the new name
196   */
197  public void setAttributeName(String value) {
198    m_Name = value;
199  }
200
201  /**
202   * Returns the tip text for this property
203   *
204   * @return tip text for this property suitable for
205   * displaying in the explorer/experimenter gui
206   */
207  public String IDIndexTipText() {
208    return 
209        "The position (starting from 1) where the attribute will be inserted "
210      + "(first and last are valid indices).";
211  }
212
213  /**
214   * Get the index of the attribute used.
215   *
216   * @return the index of the attribute
217   */
218  public String getIDIndex() {
219    return m_Index.getSingleIndex();
220  }
221
222  /**
223   * Sets index of the attribute used.
224   *
225   * @param value the index of the attribute
226   */
227  public void setIDIndex(String value) {
228    m_Index.setSingleIndex(value);
229  }
230
231  /**
232   * Returns the Capabilities of this filter.
233   *
234   * @return            the capabilities of this object
235   * @see               Capabilities
236   */
237  public Capabilities getCapabilities() {
238    Capabilities result = super.getCapabilities();
239    result.disableAll();
240
241    // attributes
242    result.enableAllAttributes();
243    result.enable(Capability.MISSING_VALUES);
244   
245    // class
246    result.enableAllClasses();
247    result.enable(Capability.MISSING_CLASS_VALUES);
248    result.enable(Capability.NO_CLASS);
249   
250    return result;
251  }
252
253  /**
254   * Sets the format of the input instances.
255   *
256   * @param instanceInfo an Instances object containing the input instance
257   * structure (any instances contained in the object are ignored - only the
258   * structure is required).
259   * @return true if the outputFormat may be collected immediately
260   * @throws Exception if the format couldn't be set successfully
261   */
262  public boolean setInputFormat(Instances instanceInfo) throws Exception {
263    Instances           outputFormat;
264    Attribute           newAttribute;
265
266    super.setInputFormat(instanceInfo);
267
268    m_Counter = -1;
269    m_Index.setUpper(instanceInfo.numAttributes());
270    outputFormat = new Instances(instanceInfo, 0);
271    newAttribute = new Attribute(m_Name);
272
273    if ((m_Index.getIndex() < 0) || 
274        (m_Index.getIndex() > getInputFormat().numAttributes()))
275      throw new IllegalArgumentException("Index out of range");
276   
277    outputFormat.insertAttributeAt(newAttribute, m_Index.getIndex());
278    setOutputFormat(outputFormat);
279   
280    return true;
281  }
282
283  /**
284   * Input an instance for filtering. Filter requires all
285   * training instances be read before producing output.
286   *
287   * @param instance the input instance
288   * @return true if the filtered instance may now be
289   * collected with output().
290   * @throws IllegalStateException if no input format has been set.
291   */
292  public boolean input(Instance instance) {
293    if (getInputFormat() == null)
294      throw new IllegalStateException("No input instance format defined");
295
296    if (m_NewBatch) {
297      resetQueue();
298      m_NewBatch = false;
299    }
300   
301    if (!isFirstBatchDone()) {
302      bufferInput(instance);
303      return false;
304    } 
305    else {
306      convertInstance(instance);
307      return true;
308    }
309  }
310
311  /**
312   * Signify that this batch of input to the filter is finished.
313   * If the filter requires all instances prior to filtering,
314   * output() may now be called to retrieve the filtered instances.
315   *
316   * @return true if there are instances pending output
317   * @throws IllegalStateException if no input structure has been defined
318   */
319  public boolean batchFinished() {
320    if (getInputFormat() == null)
321      throw new IllegalStateException("No input instance format defined");
322
323    if (!isFirstBatchDone()) {
324      m_Counter = 0;
325     
326      // Convert pending input instances
327      for (int i = 0; i < getInputFormat().numInstances(); i++)
328        convertInstance(getInputFormat().instance(i));
329    } 
330   
331    // Free memory
332    flushInput();
333
334    m_NewBatch = true;
335    m_FirstBatchDone = true;
336   
337    return (numPendingOutput() != 0);
338  }
339
340  /**
341   * Convert a single instance over. The converted instance is
342   * added to the end of the output queue.
343   *
344   * @param instance the instance to convert
345   */
346  protected void convertInstance(Instance instance) {
347    Instance            inst;
348   
349    m_Counter++;
350
351    // build instance
352    try {
353      inst = (Instance)instance.copy();
354
355      // First copy string values from input to output
356      copyValues(inst, true, inst.dataset(), getOutputFormat());
357
358      // Insert the new attribute and reassign to output
359      inst.setDataset(null);
360      inst.insertAttributeAt(m_Index.getIndex());
361      inst.setValue(m_Index.getIndex(), m_Counter);
362      inst.setDataset(getOutputFormat());
363
364      push(inst);
365    }
366    catch (Exception e) {
367      e.printStackTrace();
368    }
369  }
370 
371  /**
372   * Returns the revision string.
373   *
374   * @return            the revision
375   */
376  public String getRevision() {
377    return RevisionUtils.extract("$Revision: 5987 $");
378  }
379
380  /**
381   * Main method for testing this class.
382   *
383   * @param args should contain arguments to the filter: use -h for help
384   */
385  public static void main(String[] args) {
386    runFilter(new AddID(), args);
387  }
388}
Note: See TracBrowser for help on using the repository browser.