source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/ChangeDateFormat.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 10.5 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    ChangeDateFormat.java
19 *    Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.filters.unsupervised.attribute;
25
26import weka.core.Attribute;
27import weka.core.Capabilities;
28import weka.core.FastVector;
29import weka.core.Instance; 
30import weka.core.DenseInstance;
31import weka.core.Instances;
32import weka.core.Option;
33import weka.core.OptionHandler;
34import weka.core.RevisionUtils;
35import weka.core.SingleIndex;
36import weka.core.UnsupportedAttributeTypeException;
37import weka.core.Utils;
38import weka.core.Capabilities.Capability;
39import weka.filters.Filter;
40import weka.filters.StreamableFilter;
41import weka.filters.UnsupervisedFilter;
42
43import java.text.ParseException;
44import java.text.SimpleDateFormat;
45import java.util.Enumeration;
46import java.util.Vector;
47
48/**
49 <!-- globalinfo-start -->
50 * Changes the date format used by a date attribute. This is most useful for converting to a format with less precision, for example, from an absolute date to day of year, etc. This changes the format string, and changes the date values to those that would be parsed by the new format.
51 * <p/>
52 <!-- globalinfo-end -->
53 *
54 <!-- options-start -->
55 * Valid options are: <p/>
56 *
57 * <pre> -C &lt;col&gt;
58 *  Sets the attribute index (default last).</pre>
59 *
60 * <pre> -F &lt;value index&gt;
61 *  Sets the output date format string (default corresponds to ISO-8601).</pre>
62 *
63 <!-- options-end -->
64 *
65 * @author <a href="mailto:len@reeltwo.com">Len Trigg</a>
66 * @version $Revision: 5987 $
67 */
68public class ChangeDateFormat 
69  extends Filter
70  implements UnsupervisedFilter, StreamableFilter, OptionHandler {
71
72  /** for serialization */
73  static final long serialVersionUID = -1609344074013448737L;
74
75  /** The default output date format. Corresponds to ISO-8601 format. */
76  private static final SimpleDateFormat DEFAULT_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
77
78  /** The attribute's index setting. */
79  private SingleIndex m_AttIndex = new SingleIndex("last"); 
80
81  /** The output date format. */
82  private SimpleDateFormat m_DateFormat = DEFAULT_FORMAT;
83
84  /** The output attribute. */
85  private Attribute m_OutputAttribute;
86
87  /**
88   * Returns a string describing this filter
89   *
90   * @return a description of the filter suitable for
91   * displaying in the explorer/experimenter gui
92   */
93  public String globalInfo() {
94    return 
95        "Changes the date format used by a date attribute. This is most "
96      + "useful for converting to a format with less precision, for example, "
97      + "from an absolute date to day of year, etc. This changes the format "
98      + "string, and changes the date values to those that would be parsed "
99      + "by the new format.";
100  }
101
102  /**
103   * Returns the Capabilities of this filter.
104   *
105   * @return            the capabilities of this object
106   * @see               Capabilities
107   */
108  public Capabilities getCapabilities() {
109    Capabilities result = super.getCapabilities();
110    result.disableAll();
111
112    // attributes
113    result.enableAllAttributes();
114    result.enable(Capability.MISSING_VALUES);
115   
116    // class
117    result.enableAllClasses();
118    result.enable(Capability.MISSING_CLASS_VALUES);
119    result.enable(Capability.NO_CLASS);
120   
121    return result;
122  }
123
124  /**
125   * Sets the format of the input instances.
126   *
127   * @param instanceInfo an Instances object containing the input
128   * instance structure (any instances contained in the object are
129   * ignored - only the structure is required).
130   * @return true if the outputFormat may be collected immediately
131   * @throws Exception if the input format can't be set
132   * successfully
133   */
134  public boolean setInputFormat(Instances instanceInfo) 
135       throws Exception {
136
137    super.setInputFormat(instanceInfo);
138    m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
139    if (!instanceInfo.attribute(m_AttIndex.getIndex()).isDate()) {
140      throw new UnsupportedAttributeTypeException("Chosen attribute not date.");
141    }
142
143    setOutputFormat();
144    return true;
145  }
146
147  /**
148   * Input an instance for filtering.
149   *
150   * @param instance the input instance
151   * @return true if the filtered instance may now be
152   * collected with output().
153   * @throws Exception if the input format was not set or the date format cannot
154   * be parsed
155   */
156  public boolean input(Instance instance) throws Exception {
157
158    if (getInputFormat() == null) {
159      throw new IllegalStateException("No input instance format defined");
160    }
161    if (m_NewBatch) {
162      resetQueue();
163      m_NewBatch = false;
164    }
165    Instance newInstance = (Instance)instance.copy();
166    int index = m_AttIndex.getIndex();
167    if (!newInstance.isMissing(index)) {
168      double value = instance.value(index);
169      try {
170        // Format and parse under the new format to force any required
171        // loss in precision.
172        value = m_OutputAttribute.parseDate(m_OutputAttribute.formatDate(value));
173      } catch (ParseException pe) {
174        throw new RuntimeException("Output date format couldn't parse its own output!!");
175      }
176      newInstance.setValue(index, value);
177    }
178    push(newInstance);
179    return true;
180  }
181
182  /**
183   * Returns an enumeration describing the available options
184   *
185   * @return an enumeration of all the available options
186   */
187  public Enumeration listOptions() {
188
189    Vector newVector = new Vector(2);
190
191    newVector.addElement(new Option(
192              "\tSets the attribute index (default last).",
193              "C", 1, "-C <col>"));
194
195    newVector.addElement(new Option(
196              "\tSets the output date format string (default corresponds to ISO-8601).",
197              "F", 1, "-F <value index>"));
198
199    return newVector.elements();
200  }
201
202  /**
203   * Parses a given list of options. <p/>
204   *
205   <!-- options-start -->
206   * Valid options are: <p/>
207   *
208   * <pre> -C &lt;col&gt;
209   *  Sets the attribute index (default last).</pre>
210   *
211   * <pre> -F &lt;value index&gt;
212   *  Sets the output date format string (default corresponds to ISO-8601).</pre>
213   *
214   <!-- options-end -->
215   *
216   * @param options the list of options as an array of strings
217   * @throws Exception if an option is not supported
218   */
219  public void setOptions(String[] options) throws Exception {
220   
221    String attIndex = Utils.getOption('C', options);
222    if (attIndex.length() != 0) {
223      setAttributeIndex(attIndex);
224    } else {
225      setAttributeIndex("last");
226    }
227
228    String formatString = Utils.getOption('F', options);
229    if (formatString.length() != 0) {
230      setDateFormat(formatString);
231    } else {
232      setDateFormat(DEFAULT_FORMAT);
233    }
234
235    if (getInputFormat() != null) {
236      setInputFormat(getInputFormat());
237    }
238  }
239 
240  /**
241   * Gets the current settings of the filter.
242   *
243   * @return an array of strings suitable for passing to setOptions
244   */
245  public String [] getOptions() {
246
247    String [] options = new String [4];
248    int current = 0;
249
250    options[current++] = "-C";
251    options[current++] = "" + getAttributeIndex();
252    options[current++] = "-F"; 
253    options[current++] = "" + getDateFormat().toPattern();
254    while (current < options.length) {
255      options[current++] = "";
256    }
257    return options;
258  }
259
260  /**
261   * @return tip text for this property suitable for
262   * displaying in the explorer/experimenter gui
263   */
264  public String attributeIndexTipText() {
265
266    return "Sets which attribute to process. This "
267      + "attribute must be of type date (\"first\" and \"last\" are valid values)";
268  }
269
270  /**
271   * Gets the index of the attribute converted.
272   *
273   * @return the index of the attribute
274   */
275  public String getAttributeIndex() {
276
277    return m_AttIndex.getSingleIndex();
278  }
279
280  /**
281   * Sets the index of the attribute used.
282   *
283   * @param attIndex the index of the attribute
284   */
285  public void setAttributeIndex(String attIndex) {
286   
287    m_AttIndex.setSingleIndex(attIndex);
288  }
289
290  /**
291   * @return tip text for this property suitable for
292   * displaying in the explorer/experimenter gui
293   */
294  public String dateFormatTipText() {
295
296    return "The date format to change to. This should be a "
297      + "format understood by Java's SimpleDateFormat class.";
298  }
299
300  /**
301   * Get the date format used in output.
302   *
303   * @return the output date format.
304   */
305  public SimpleDateFormat getDateFormat() {
306
307    return m_DateFormat;
308  }
309
310  /**
311   * Sets the output date format.
312   *
313   * @param dateFormat the output date format.
314   */
315  public void setDateFormat(String dateFormat) {
316
317    setDateFormat(new SimpleDateFormat(dateFormat));
318  }
319
320  /**
321   * Sets the output date format.
322   *
323   * @param dateFormat the output date format.
324   */
325  public void setDateFormat(SimpleDateFormat dateFormat) {
326    if (dateFormat == null) {
327      throw new NullPointerException();
328    }
329    m_DateFormat = dateFormat;
330  }
331
332  /**
333   * Set the output format. Changes the format of the specified date
334   * attribute.
335   */
336  private void setOutputFormat() {
337   
338    // Create new attributes
339    FastVector newAtts = new FastVector(getInputFormat().numAttributes());
340    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
341      Attribute att = getInputFormat().attribute(j);
342      if (j == m_AttIndex.getIndex()) {
343        newAtts.addElement(new Attribute(att.name(), getDateFormat().toPattern())); 
344      } else {
345        newAtts.addElement(att.copy()); 
346      }
347    }
348     
349    // Create new header
350    Instances newData = new Instances(getInputFormat().relationName(), newAtts, 0);
351    newData.setClassIndex(getInputFormat().classIndex());
352    m_OutputAttribute = newData.attribute(m_AttIndex.getIndex());
353    setOutputFormat(newData);
354  }
355 
356  /**
357   * Returns the revision string.
358   *
359   * @return            the revision
360   */
361  public String getRevision() {
362    return RevisionUtils.extract("$Revision: 5987 $");
363  }
364 
365  /**
366   * Main method for testing this class.
367   *
368   * @param argv should contain arguments to the filter:
369   * use -h for help
370   */
371  public static void main(String [] argv) {
372    runFilter(new ChangeDateFormat(), argv);
373  }
374}
Note: See TracBrowser for help on using the repository browser.