source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/Add.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 16.1 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    Add.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.filters.unsupervised.attribute;
25
26import weka.core.Attribute;
27import weka.core.Capabilities;
28import weka.core.FastVector;
29import weka.core.Instance; 
30import weka.core.DenseInstance;
31import weka.core.Instances;
32import weka.core.Option;
33import weka.core.OptionHandler;
34import weka.core.Range;
35import weka.core.RevisionUtils;
36import weka.core.SelectedTag;
37import weka.core.SingleIndex;
38import weka.core.Tag;
39import weka.core.Utils;
40import weka.core.Capabilities.Capability;
41import weka.filters.Filter;
42import weka.filters.StreamableFilter;
43import weka.filters.UnsupervisedFilter;
44
45import java.text.SimpleDateFormat;
46import java.util.Enumeration;
47import java.util.Vector;
48
49/**
50 <!-- globalinfo-start -->
51 * An instance filter that adds a new attribute to the dataset. The new attribute will contain all missing values.
52 * <p/>
53 <!-- globalinfo-end -->
54 *
55 <!-- options-start -->
56 * Valid options are: <p/>
57 *
58 * <pre> -T &lt;NUM|NOM|STR|DAT&gt;
59 *  The type of attribute to create:
60 *  NUM = Numeric attribute
61 *  NOM = Nominal attribute
62 *  STR = String attribute
63 *  DAT = Date attribute
64 *  (default: NUM)</pre>
65 *
66 * <pre> -C &lt;index&gt;
67 *  Specify where to insert the column. First and last
68 *  are valid indexes.(default: last)</pre>
69 *
70 * <pre> -N &lt;name&gt;
71 *  Name of the new attribute.
72 *  (default: 'Unnamed')</pre>
73 *
74 * <pre> -L &lt;label1,label2,...&gt;
75 *  Create nominal attribute with given labels
76 *  (default: numeric attribute)</pre>
77 *
78 * <pre> -F &lt;format&gt;
79 *  The format of the date values (see ISO-8601)
80 *  (default: yyyy-MM-dd'T'HH:mm:ss)</pre>
81 *
82 <!-- options-end -->
83 *
84 * @author Len Trigg (trigg@cs.waikato.ac.nz)
85 * @version $Revision: 5987 $
86 */
87public class Add 
88  extends Filter
89  implements UnsupervisedFilter, StreamableFilter, OptionHandler {
90 
91  /** for serialization. */
92  static final long serialVersionUID = 761386447332932389L;
93
94  /** the attribute type. */
95  public static final Tag[] TAGS_TYPE = {
96    new Tag(Attribute.NUMERIC, "NUM", "Numeric attribute"),
97    new Tag(Attribute.NOMINAL, "NOM", "Nominal attribute"),
98    new Tag(Attribute.STRING,  "STR", "String attribute"),
99    new Tag(Attribute.DATE,    "DAT", "Date attribute")
100  };
101 
102  /** Record the type of attribute to insert. */
103  protected int m_AttributeType = Attribute.NUMERIC;
104
105  /** The name for the new attribute. */
106  protected String m_Name = "unnamed";
107
108  /** The location to insert the new attribute. */
109  private SingleIndex m_Insert = new SingleIndex("last"); 
110
111  /** The list of labels for nominal attribute. */
112  protected FastVector m_Labels = new FastVector();
113
114  /** The date format. */
115  protected String m_DateFormat = "yyyy-MM-dd'T'HH:mm:ss";
116 
117  /**
118   * Returns a string describing this filter.
119   *
120   * @return a description of the filter suitable for
121   * displaying in the explorer/experimenter gui
122   */
123  public String globalInfo() {
124
125    return "An instance filter that adds a new attribute to the dataset."
126      + " The new attribute will contain all missing values.";
127  }
128
129  /**
130   * Returns an enumeration describing the available options.
131   *
132   * @return an enumeration of all the available options.
133   */
134  public Enumeration listOptions() {
135    Vector              newVector;
136    String              desc;
137    SelectedTag         tag;
138    int                 i;
139
140    newVector = new Vector();
141
142    desc  = "";
143    for (i = 0; i < TAGS_TYPE.length; i++) {
144      tag = new SelectedTag(TAGS_TYPE[i].getID(), TAGS_TYPE);
145      desc  +=   "\t" + tag.getSelectedTag().getIDStr() 
146               + " = " + tag.getSelectedTag().getReadable()
147               + "\n";
148    }
149    newVector.addElement(new Option(
150        "\tThe type of attribute to create:\n"
151        + desc
152        +"\t(default: " + new SelectedTag(Attribute.NUMERIC, TAGS_TYPE) + ")",
153        "T", 1, "-T " + Tag.toOptionList(TAGS_TYPE)));
154
155    newVector.addElement(new Option(
156        "\tSpecify where to insert the column. First and last\n"
157        +"\tare valid indexes.(default: last)",
158        "C", 1, "-C <index>"));
159
160    newVector.addElement(new Option(
161        "\tName of the new attribute.\n"
162        +"\t(default: 'Unnamed')",
163        "N", 1,"-N <name>"));
164   
165    newVector.addElement(new Option(
166        "\tCreate nominal attribute with given labels\n"
167        +"\t(default: numeric attribute)",
168        "L", 1, "-L <label1,label2,...>"));
169
170    newVector.addElement(new Option(
171        "\tThe format of the date values (see ISO-8601)\n"
172        +"\t(default: yyyy-MM-dd'T'HH:mm:ss)",
173        "F", 1, "-F <format>"));
174
175    return newVector.elements();
176  }
177
178
179  /**
180   * Parses a given list of options. <p/>
181   *
182   <!-- options-start -->
183   * Valid options are: <p/>
184   *
185   * <pre> -T &lt;NUM|NOM|STR|DAT&gt;
186   *  The type of attribute to create:
187   *  NUM = Numeric attribute
188   *  NOM = Nominal attribute
189   *  STR = String attribute
190   *  DAT = Date attribute
191   *  (default: NUM)</pre>
192   *
193   * <pre> -C &lt;index&gt;
194   *  Specify where to insert the column. First and last
195   *  are valid indexes.(default: last)</pre>
196   *
197   * <pre> -N &lt;name&gt;
198   *  Name of the new attribute.
199   *  (default: 'Unnamed')</pre>
200   *
201   * <pre> -L &lt;label1,label2,...&gt;
202   *  Create nominal attribute with given labels
203   *  (default: numeric attribute)</pre>
204   *
205   * <pre> -F &lt;format&gt;
206   *  The format of the date values (see ISO-8601)
207   *  (default: yyyy-MM-dd'T'HH:mm:ss)</pre>
208   *
209   <!-- options-end -->
210   *
211   * @param options the list of options as an array of strings
212   * @throws Exception if an option is not supported
213   */
214  public void setOptions(String[] options) throws Exception {
215    String      tmpStr;
216
217    tmpStr = Utils.getOption('T', options);
218    if (tmpStr.length() != 0)
219      setAttributeType(new SelectedTag(tmpStr, TAGS_TYPE));
220    else
221      setAttributeType(new SelectedTag(Attribute.NUMERIC, TAGS_TYPE));
222   
223    tmpStr = Utils.getOption('C', options);
224    if (tmpStr.length() == 0)
225      tmpStr = "last";
226    setAttributeIndex(tmpStr);
227   
228    setAttributeName(Utils.unbackQuoteChars(Utils.getOption('N', options)));
229   
230    if (m_AttributeType == Attribute.NOMINAL) {
231      tmpStr = Utils.getOption('L', options);
232      if (tmpStr.length() != 0)
233        setNominalLabels(tmpStr);
234    }
235    else if (m_AttributeType == Attribute.DATE) {
236      tmpStr = Utils.getOption('F', options);
237      if (tmpStr.length() != 0)
238        setDateFormat(tmpStr);
239    }
240
241    if (getInputFormat() != null) {
242      setInputFormat(getInputFormat());
243    }
244  }
245
246  /**
247   * Gets the current settings of the filter.
248   *
249   * @return an array of strings suitable for passing to setOptions
250   */
251  public String [] getOptions() {
252    Vector<String>      result;
253   
254    result = new Vector<String>();
255   
256    if (m_AttributeType != Attribute.NUMERIC) {
257      result.add("-T");
258      result.add("" + getAttributeType());
259    }
260   
261    result.add("-N");
262    result.add(Utils.backQuoteChars(getAttributeName()));
263   
264    if (m_AttributeType == Attribute.NOMINAL) {
265      result.add("-L");
266      result.add(getNominalLabels());
267    }
268    else if (m_AttributeType == Attribute.NOMINAL) {
269      result.add("-F");
270      result.add(getDateFormat());
271    }
272   
273    result.add("-C");
274    result.add("" + getAttributeIndex());
275
276    return result.toArray(new String[result.size()]);
277  }
278
279  /**
280   * Returns the Capabilities of this filter.
281   *
282   * @return            the capabilities of this object
283   * @see               Capabilities
284   */
285  public Capabilities getCapabilities() {
286    Capabilities result = super.getCapabilities();
287    result.disableAll();
288
289    // attributes
290    result.enableAllAttributes();
291    result.enable(Capability.MISSING_VALUES);
292   
293    // class
294    result.enableAllClasses();
295    result.enable(Capability.MISSING_CLASS_VALUES);
296    result.enable(Capability.NO_CLASS);
297   
298    return result;
299  }
300
301  /**
302   * Sets the format of the input instances.
303   *
304   * @param instanceInfo an Instances object containing the input instance
305   * structure (any instances contained in the object are ignored - only the
306   * structure is required).
307   * @return true if the outputFormat may be collected immediately
308   * @throws Exception if the format couldn't be set successfully
309   */
310  public boolean setInputFormat(Instances instanceInfo) throws Exception {
311
312    super.setInputFormat(instanceInfo);
313
314    m_Insert.setUpper(instanceInfo.numAttributes());
315    Instances outputFormat = new Instances(instanceInfo, 0);
316    Attribute newAttribute = null;
317    switch (m_AttributeType) {
318      case Attribute.NUMERIC:
319        newAttribute = new Attribute(m_Name);
320        break;
321      case Attribute.NOMINAL:
322        newAttribute = new Attribute(m_Name, m_Labels);
323        break;
324      case Attribute.STRING:
325        newAttribute = new Attribute(m_Name, (FastVector) null);
326        break;
327      case Attribute.DATE:
328        newAttribute = new Attribute(m_Name, m_DateFormat);
329        break;
330      default:
331        throw new IllegalArgumentException("Unknown attribute type in Add");
332    }
333
334    if ((m_Insert.getIndex() < 0) || 
335        (m_Insert.getIndex() > getInputFormat().numAttributes())) {
336      throw new IllegalArgumentException("Index out of range");
337    }
338    outputFormat.insertAttributeAt(newAttribute, m_Insert.getIndex());
339    setOutputFormat(outputFormat);
340   
341    // all attributes, except index of added attribute
342    // (otherwise the length of the input/output indices differ)
343    Range atts = new Range(m_Insert.getSingleIndex());
344    atts.setInvert(true);
345    atts.setUpper(outputFormat.numAttributes() - 1);
346    initOutputLocators(outputFormat, atts.getSelection());
347   
348    return true;
349  }
350
351  /**
352   * Input an instance for filtering. Ordinarily the instance is processed
353   * and made available for output immediately. Some filters require all
354   * instances be read before producing output.
355   *
356   * @param instance the input instance
357   * @return true if the filtered instance may now be
358   * collected with output().
359   * @throws IllegalStateException if no input format has been defined.
360   */
361  public boolean input(Instance instance) {
362
363    if (getInputFormat() == null) {
364      throw new IllegalStateException("No input instance format defined");
365    }
366    if (m_NewBatch) {
367      resetQueue();
368      m_NewBatch = false;
369    }
370
371    Instance inst = (Instance)instance.copy();
372
373    // First copy string values from input to output
374    copyValues(inst, true, inst.dataset(), getOutputFormat());
375   
376    // Insert the new attribute and reassign to output
377    inst.setDataset(null);
378    inst.insertAttributeAt(m_Insert.getIndex());
379    inst.setDataset(getOutputFormat());
380    push(inst);
381    return true;
382  }
383
384  /**
385   * Returns the tip text for this property.
386   *
387   * @return tip text for this property suitable for
388   * displaying in the explorer/experimenter gui
389   */
390  public String attributeNameTipText() {
391
392    return "Set the new attribute's name.";
393  }
394
395  /**
396   * Get the name of the attribute to be created.
397   *
398   * @return the new attribute name
399   */
400  public String getAttributeName() {
401
402    return m_Name;
403  }
404
405  /**
406   * Set the new attribute's name.
407   *
408   * @param name the new name
409   */
410  public void setAttributeName(String name) {
411    if (name.trim().equals(""))
412      m_Name = "unnamed";
413    else
414      m_Name = name;
415  }
416
417  /**
418   * Returns the tip text for this property.
419   *
420   * @return tip text for this property suitable for
421   * displaying in the explorer/experimenter gui
422   */
423  public String attributeIndexTipText() {
424
425    return "The position (starting from 1) where the attribute will be inserted "
426      + "(first and last are valid indices).";
427  }
428
429  /**
430   * Get the index of the attribute used.
431   *
432   * @return the index of the attribute
433   */
434  public String getAttributeIndex() {
435
436    return m_Insert.getSingleIndex();
437  }
438
439  /**
440   * Sets index of the attribute used.
441   *
442   * @param attIndex the index of the attribute
443   */
444  public void setAttributeIndex(String attIndex) {
445   
446    m_Insert.setSingleIndex(attIndex);
447  }
448
449  /**
450   * Returns the tip text for this property.
451   *
452   * @return tip text for this property suitable for
453   * displaying in the explorer/experimenter gui
454   */
455  public String nominalLabelsTipText() {
456    return "The list of value labels (nominal attribute creation only). "
457      + " The list must be comma-separated, eg: \"red,green,blue\"."
458      + " If this is empty, the created attribute will be numeric.";
459  }
460
461  /**
462   * Get the list of labels for nominal attribute creation.
463   *
464   * @return the list of labels for nominal attribute creation
465   */
466  public String getNominalLabels() {
467
468    String labelList = "";
469    for(int i = 0; i < m_Labels.size(); i++) {
470      if (i == 0) {
471        labelList = (String)m_Labels.elementAt(i);
472      } else {
473        labelList += "," + (String)m_Labels.elementAt(i); 
474      }
475    }
476    return labelList;
477  }
478
479  /**
480   * Set the labels for nominal attribute creation.
481   *
482   * @param labelList a comma separated list of labels
483   * @throws IllegalArgumentException if the labelList was invalid
484   */
485  public void setNominalLabels(String labelList) {
486
487    FastVector labels = new FastVector (10);
488
489    // Split the labelList up into the vector
490    int commaLoc;
491    while ((commaLoc = labelList.indexOf(',')) >= 0) {
492      String label = labelList.substring(0, commaLoc).trim();
493      if (!label.equals("")) {
494        labels.addElement(label);
495      } else {
496        throw new IllegalArgumentException("Invalid label list at "+
497                                           labelList.substring(commaLoc));
498      }
499      labelList = labelList.substring(commaLoc + 1);
500    }
501    String label = labelList.trim();
502    if (!label.equals("")) {
503      labels.addElement(label);
504    }
505
506    // If everything is OK, make the type change
507    m_Labels = labels;
508    if (labels.size() == 0) {
509      m_AttributeType = Attribute.NUMERIC;
510    } else {
511      m_AttributeType = Attribute.NOMINAL; 
512    }
513  }
514
515  /**
516   * Returns the tip text for this property
517   *
518   * @return            tip text for this property suitable for
519   *                    displaying in the explorer/experimenter gui
520   */
521  public String attributeTypeTipText() {
522    return "Defines the type of the attribute to generate.";
523  }
524
525  /**
526   * Sets the type of attribute to generate.
527   *
528   * @param value       the attribute type
529   */
530  public void setAttributeType(SelectedTag value) {
531    if (value.getTags() == TAGS_TYPE) {
532      m_AttributeType = value.getSelectedTag().getID();
533    }
534  }
535
536  /**
537   * Gets the type of attribute to generate.
538   *
539   * @return            the current attribute type.
540   */
541  public SelectedTag getAttributeType() {
542    return new SelectedTag(m_AttributeType, TAGS_TYPE);
543  }
544
545  /**
546   * Returns the tip text for this property.
547   *
548   * @return            tip text for this property suitable for
549   *                    displaying in the explorer/experimenter gui
550   */
551  public String dateFormatTipText() {
552    return "The format of the date values (see ISO-8601).";
553  }
554
555  /**
556   * Get the date format, complying to ISO-8601.
557   *
558   * @return            the date format
559   */
560  public String getDateFormat() {
561    return m_DateFormat;
562  }
563
564  /**
565   * Set the date format, complying to ISO-8601.
566   *
567   * @param value       a comma separated list of labels
568   */
569  public void setDateFormat(String value) {
570    try {
571      new SimpleDateFormat(value);
572      m_DateFormat = value;
573    }
574    catch (Exception e) {
575      e.printStackTrace();
576    }
577  }
578 
579  /**
580   * Returns the revision string.
581   *
582   * @return            the revision
583   */
584  public String getRevision() {
585    return RevisionUtils.extract("$Revision: 5987 $");
586  }
587
588  /**
589   * Main method for testing this class.
590   *
591   * @param argv should contain arguments to the filter: use -h for help
592   */
593  public static void main(String [] argv) {
594    runFilter(new Add(), argv);
595  }
596}
Note: See TracBrowser for help on using the repository browser.