source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/AbstractTimeSeries.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 14.0 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    AbstractTimeSeries.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.filters.unsupervised.attribute;
24
25import java.util.Enumeration;
26import java.util.Vector;
27import weka.core.Instance; 
28import weka.core.DenseInstance;
29import weka.core.Instances;
30import weka.core.Option;
31import weka.core.OptionHandler;
32import weka.core.Queue;
33import weka.core.Range;
34import weka.core.Utils;
35import weka.filters.Filter;
36import weka.filters.UnsupervisedFilter;
37
38/**
39 * An abstract instance filter that assumes instances form time-series data and
40 * performs some merging of attribute values in the current instance with
41 * attribute attribute values of some previous (or future) instance. For
42 * instances where the desired value is unknown either the instance may
43 * be dropped, or missing values used.<p>
44 *
45 * Valid filter-specific options are:<p>
46 *
47 * -R index1,index2-index4,...<br>
48 * Specify list of columns to calculate new values for.
49 * First and last are valid indexes.
50 * (default none)<p>
51 *
52 * -V <br>
53 * Invert matching sense (i.e. calculate for all non-specified columns)<p>
54 *
55 * -I num <br>
56 * The number of instances forward to merge values between.
57 * A negative number indicates taking values from a past instance.
58 * (default -1) <p>
59 *
60 * -M <br>
61 * For instances at the beginning or end of the dataset where the translated
62 * values are not known, remove those instances (default is to use missing
63 * values). <p>
64 *
65 * @author Len Trigg (trigg@cs.waikato.ac.nz)
66 * @version $Revision: 5987 $
67 */
68public abstract class AbstractTimeSeries
69  extends Filter
70  implements UnsupervisedFilter, OptionHandler {
71
72  /** for serialization */
73  private static final long serialVersionUID = -3795656792078022357L;
74
75  /** Stores which columns to copy */
76  protected Range m_SelectedCols = new Range();
77
78  /**
79   * True if missing values should be used rather than removing instances
80   * where the translated value is not known (due to border effects).
81   */
82  protected boolean m_FillWithMissing = true;
83
84  /**
85   * The number of instances forward to translate values between.
86   * A negative number indicates taking values from a past instance.
87   */
88  protected int m_InstanceRange = -1;
89
90  /** Stores the historical instances to copy values between */
91  protected Queue m_History;
92 
93  /**
94   * Returns an enumeration describing the available options.
95   *
96   * @return an enumeration of all the available options.
97   */
98  public Enumeration listOptions() {
99
100    Vector newVector = new Vector(4);
101
102    newVector.addElement(new Option(
103              "\tSpecify list of columns to translate in time. First and\n"
104              + "\tlast are valid indexes. (default none)",
105              "R", 1, "-R <index1,index2-index4,...>"));
106    newVector.addElement(new Option(
107              "\tInvert matching sense (i.e. calculate for all non-specified columns)",
108              "V", 0, "-V"));
109    newVector.addElement(new Option(
110              "\tThe number of instances forward to translate values\n"
111              + "\tbetween. A negative number indicates taking values from\n"
112              + "\ta past instance. (default -1)",
113              "I", 1, "-I <num>"));
114    newVector.addElement(new Option(
115              "\tFor instances at the beginning or end of the dataset where\n"
116              + "\tthe translated values are not known, remove those instances\n"
117              + "\t(default is to use missing values).",
118              "M", 0, "-M"));
119
120    return newVector.elements();
121  }
122
123  /**
124   * Parses a given list of options controlling the behaviour of this object.
125   * Valid options are:<p>
126   *
127   * -R index1,index2-index4,...<br>
128   * Specify list of columns to copy. First and last are valid indexes.
129   * (default none)<p>
130   *
131   * -V<br>
132   * Invert matching sense (i.e. calculate for all non-specified columns)<p>
133   *
134   * -I num <br>
135   * The number of instances forward to translate values between.
136   * A negative number indicates taking values from a past instance.
137   * (default -1) <p>
138   *
139   * -M <br>
140   * For instances at the beginning or end of the dataset where the translated
141   * values are not known, remove those instances (default is to use missing
142   * values). <p>
143   *
144   * @param options the list of options as an array of strings
145   * @throws Exception if an option is not supported
146   */
147  public void setOptions(String[] options) throws Exception {
148
149    String copyList = Utils.getOption('R', options);
150    if (copyList.length() != 0) {
151      setAttributeIndices(copyList);
152    } else {
153      setAttributeIndices("");
154    }
155   
156    setInvertSelection(Utils.getFlag('V', options));
157
158    setFillWithMissing(!Utils.getFlag('M', options));
159   
160    String instanceRange = Utils.getOption('I', options);
161    if (instanceRange.length() != 0) {
162      setInstanceRange(Integer.parseInt(instanceRange));
163    } else {
164      setInstanceRange(-1);
165    }
166
167    if (getInputFormat() != null) {
168      setInputFormat(getInputFormat());
169    }
170  }
171
172  /**
173   * Gets the current settings of the filter.
174   *
175   * @return an array of strings suitable for passing to setOptions
176   */
177  public String [] getOptions() {
178
179    String [] options = new String [6];
180    int current = 0;
181
182    if (!getAttributeIndices().equals("")) {
183      options[current++] = "-R"; options[current++] = getAttributeIndices();
184    }
185    if (getInvertSelection()) {
186      options[current++] = "-V";
187    }
188    options[current++] = "-I"; options[current++] = "" + getInstanceRange();
189    if (!getFillWithMissing()) {
190      options[current++] = "-M";
191    }
192
193    while (current < options.length) {
194      options[current++] = "";
195    }
196    return options;
197  }
198
199  /**
200   * Sets the format of the input instances.
201   *
202   * @param instanceInfo an Instances object containing the input instance
203   * structure (any instances contained in the object are ignored - only the
204   * structure is required).
205   * @return true if the outputFormat may be collected immediately
206   * @throws Exception if the format couldn't be set successfully
207   */
208  public boolean setInputFormat(Instances instanceInfo) throws Exception {
209
210    super.setInputFormat(instanceInfo);
211    resetHistory();
212    m_SelectedCols.setUpper(instanceInfo.numAttributes() - 1);
213    return false;
214  }
215 
216
217  /**
218   * Input an instance for filtering. Ordinarily the instance is processed
219   * and made available for output immediately. Some filters require all
220   * instances be read before producing output.
221   *
222   * @param instance the input instance
223   * @return true if the filtered instance may now be
224   * collected with output().
225   * @throws Exception if the input instance was not of the correct
226   * format or if there was a problem with the filtering.
227   */
228  public boolean input(Instance instance) throws Exception {
229
230    if (getInputFormat() == null) {
231      throw new NullPointerException("No input instance format defined");
232    }
233    if (m_NewBatch) {
234      resetQueue();
235      m_NewBatch = false;
236      resetHistory();
237    }
238
239    Instance newInstance = historyInput(instance);
240    if (newInstance != null) {
241      push(newInstance);
242      return true;
243    } else {
244      return false;
245    }
246  }
247
248  /**
249   * Signifies that this batch of input to the filter is finished. If the
250   * filter requires all instances prior to filtering, output() may now
251   * be called to retrieve the filtered instances.
252   *
253   * @return true if there are instances pending output
254   * @throws IllegalStateException if no input structure has been defined
255   */
256  public boolean batchFinished() {
257
258    if (getInputFormat() == null) {
259      throw new IllegalStateException("No input instance format defined");
260    }
261    if (getFillWithMissing() && (m_InstanceRange > 0)) {
262      while (!m_History.empty()) {
263        push(mergeInstances(null, (Instance) m_History.pop()));
264      }
265    } 
266    flushInput();
267    m_NewBatch = true;
268    m_FirstBatchDone = true;
269    return (numPendingOutput() != 0);
270  }
271
272  /**
273   * Returns the tip text for this property
274   * @return tip text for this property suitable for
275   * displaying in the explorer/experimenter gui
276   */
277  public String fillWithMissingTipText() {
278    return "For instances at the beginning or end of the dataset where the translated "
279      + "values are not known, use missing values (default is to remove those "
280      + "instances)";
281  }
282
283  /**
284   * Gets whether missing values should be used rather than removing instances
285   * where the translated value is not known (due to border effects).
286   *
287   * @return true if so
288   */
289  public boolean getFillWithMissing() {
290   
291    return m_FillWithMissing;
292  }
293 
294  /**
295   * Sets whether missing values should be used rather than removing instances
296   * where the translated value is not known (due to border effects).
297   *
298   * @param newFillWithMissing true if so
299   */
300  public void setFillWithMissing(boolean newFillWithMissing) {
301   
302    m_FillWithMissing = newFillWithMissing;
303  }
304
305  /**
306   * Returns the tip text for this property
307   * @return tip text for this property suitable for
308   * displaying in the explorer/experimenter gui
309   */
310  public String instanceRangeTipText() {
311    return "The number of instances forward/backward to merge values between. "
312      + "A negative number indicates taking values from a past instance.";
313  }
314
315  /**
316   * Gets the number of instances forward to translate values between.
317   * A negative number indicates taking values from a past instance.
318   *
319   * @return Value of InstanceRange.
320   */
321  public int getInstanceRange() {
322   
323    return m_InstanceRange;
324  }
325 
326  /**
327   * Sets the number of instances forward to translate values between.
328   * A negative number indicates taking values from a past instance.
329   *
330   * @param newInstanceRange Value to assign to InstanceRange.
331   */
332  public void setInstanceRange(int newInstanceRange) {
333   
334    m_InstanceRange = newInstanceRange;
335  }
336 
337  /**
338   * Returns the tip text for this property
339   * @return tip text for this property suitable for
340   * displaying in the explorer/experimenter gui
341   */
342  public String invertSelectionTipText() {
343    return "Invert matching sense. ie calculate for all non-specified columns.";
344  }
345
346  /**
347   * Get whether the supplied columns are to be removed or kept
348   *
349   * @return true if the supplied columns will be kept
350   */
351  public boolean getInvertSelection() {
352
353    return m_SelectedCols.getInvert();
354  }
355
356  /**
357   * Set whether selected columns should be removed or kept. If true the
358   * selected columns are kept and unselected columns are copied. If false
359   * selected columns are copied and unselected columns are kept.
360   *
361   * @param invert the new invert setting
362   */
363  public void setInvertSelection(boolean invert) {
364
365    m_SelectedCols.setInvert(invert);
366  }
367
368  /**
369   * Returns the tip text for this property
370   *
371   * @return tip text for this property suitable for
372   * displaying in the explorer/experimenter gui
373   */
374  public String attributeIndicesTipText() {
375    return "Specify range of attributes to act on."
376      + " This is a comma separated list of attribute indices, with"
377      + " \"first\" and \"last\" valid values. Specify an inclusive"
378      + " range with \"-\". E.g: \"first-3,5,6-10,last\".";
379  }
380
381  /**
382   * Get the current range selection
383   *
384   * @return a string containing a comma separated list of ranges
385   */
386  public String getAttributeIndices() {
387
388    return m_SelectedCols.getRanges();
389  }
390
391  /**
392   * Set which attributes are to be copied (or kept if invert is true)
393   *
394   * @param rangeList a string representing the list of attributes.  Since
395   * the string will typically come from a user, attributes are indexed from
396   * 1. <br>
397   * eg: first-3,5,6-last
398   */
399  public void setAttributeIndices(String rangeList) {
400
401    m_SelectedCols.setRanges(rangeList);
402  }
403
404  /**
405   * Set which attributes are to be copied (or kept if invert is true)
406   *
407   * @param attributes an array containing indexes of attributes to select.
408   * Since the array will typically come from a program, attributes are indexed
409   * from 0.
410   */
411  public void setAttributeIndicesArray(int [] attributes) {
412
413    setAttributeIndices(Range.indicesToRangeList(attributes));
414  }
415
416  /** Clears any instances from the history queue. */
417  protected void resetHistory() {
418
419    if (m_History == null) {
420      m_History = new Queue();
421    } else {
422      m_History.removeAllElements();
423    }
424  }
425
426  /**
427   * Adds an instance to the history buffer. If enough instances are in
428   * the buffer, a new instance may be output, with selected attribute
429   * values copied from one to another.
430   *
431   * @param instance the input instance
432   * @return a new instance with translated values, or null if no
433   * output instance is produced
434   */
435  protected Instance historyInput(Instance instance) {
436
437    m_History.push(instance);
438    if (m_History.size() <= Math.abs(m_InstanceRange)) {
439      if (getFillWithMissing() && (m_InstanceRange < 0)) {
440        return mergeInstances(null, instance);
441      } else {
442        return null;
443      }
444    }
445    if (m_InstanceRange < 0) {
446      return mergeInstances((Instance) m_History.pop(), instance);
447    } else {
448      return mergeInstances(instance, (Instance) m_History.pop());
449    }
450  }
451
452  /**
453   * Creates a new instance the same as one instance (the "destination")
454   * but with some attribute values copied from another instance
455   * (the "source")
456   *
457   * @param source the source instance
458   * @param dest the destination instance
459   * @return the new merged instance
460   */
461  protected abstract Instance mergeInstances(Instance source, Instance dest);
462 
463}
Note: See TracBrowser for help on using the repository browser.