source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/instance/NonSparseToSparse.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 9.2 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    NonSparseToSparse.java
19 *    Copyright (C) 2000 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.filters.unsupervised.instance;
25
26import java.util.Enumeration;
27import java.util.Vector;
28
29import weka.core.Attribute;
30import weka.core.Capabilities;
31import weka.core.FastVector;
32import weka.core.Instance;
33import weka.core.DenseInstance;
34import weka.core.Instances;
35import weka.core.Option;
36import weka.core.OptionHandler;
37import weka.core.RevisionUtils;
38import weka.core.SparseInstance;
39import weka.core.Utils;
40import weka.core.Capabilities.Capability;
41import weka.filters.Filter;
42import weka.filters.StreamableFilter;
43import weka.filters.UnsupervisedFilter;
44
45/**
46 <!-- globalinfo-start -->
47 * An instance filter that converts all incoming instances into sparse format.
48 * <p/>
49 <!-- globalinfo-end -->
50 *
51 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
52 * @version $Revision: 5987 $
53 */
54public class NonSparseToSparse 
55  extends Filter
56  implements UnsupervisedFilter, StreamableFilter, OptionHandler {
57
58  /** for serialization */
59  static final long serialVersionUID = 4694489111366063852L;
60 
61  protected boolean m_encodeMissingAsZero = false;
62 
63  protected boolean m_insertDummyNominalFirstValue = false;
64 
65  /**
66   * Returns a string describing this filter
67   *
68   * @return a description of the filter suitable for
69   * displaying in the explorer/experimenter gui
70   */
71  public String globalInfo() {
72    return "An instance filter that converts all incoming instances"
73      + " into sparse format.";
74  }
75
76  /**
77   * Returns the Capabilities of this filter.
78   *
79   * @return            the capabilities of this object
80   * @see               Capabilities
81   */
82  public Capabilities getCapabilities() {
83    Capabilities result = super.getCapabilities();
84    result.disableAll();
85
86    // attributes
87    result.enableAllAttributes();
88    result.enable(Capability.MISSING_VALUES);
89   
90    // class
91    result.enableAllClasses();
92    result.enable(Capability.MISSING_CLASS_VALUES);
93    result.enable(Capability.NO_CLASS);
94   
95    return result;
96  }
97 
98  /**
99   * Returns an enumeration describing the available options.
100   *
101   * @return an enumeration of all the available options.
102   */
103  public Enumeration listOptions() {
104    Vector result;
105   
106    result = new Vector();
107    result.add(new Option("\tTreat missing values as zero.",
108        "M", 0, "-M"));
109    result.add(new Option("\tAdd a dummy first value for nominal attributes.",
110        "F", 0, "-F"));
111   
112    return result.elements();
113  }
114 
115  public void setOptions(String[] options) throws Exception {
116    m_encodeMissingAsZero = Utils.getFlag('M', options);
117    m_insertDummyNominalFirstValue = Utils.getFlag('F', options);
118  }
119 
120  public String[] getOptions() {
121    Vector result = new Vector();
122   
123    if (m_encodeMissingAsZero) {
124      result.add("-M");     
125    }
126   
127    if (m_insertDummyNominalFirstValue) {
128      result.add("-F");
129    }
130   
131    return (String[]) result.toArray(new String[result.size()]);
132  }
133 
134  /**
135   * Set whether missing values should be treated in the same
136   * way as zeros
137   *
138   * @param m true if missing values are to be treated the same
139   * as zeros
140   */
141  public void setTreatMissingValuesAsZero(boolean m) {
142    m_encodeMissingAsZero = m;
143  }
144 
145  /**
146   * Get whether missing values are to be treated in the same
147   * way as zeros
148   *
149   * @return true if missing values are to be treated in the
150   * same way as zeros
151   */
152  public boolean getTreatMissingValuesAsZero() {
153    return m_encodeMissingAsZero;
154  }
155 
156  /**
157   * Returns the tip text for this property
158   *
159   * @return            tip text for this property suitable for
160   *                    displaying in the explorer/experimenter gui
161   */
162  public String treatMissingValuesAsZeroTipText() {
163    return "Treat missing values in the same way as zeros.";
164  }
165 
166  /**
167   * Set whether to insert a dummy first value in the definition
168   * for each nominal attribute or not.
169   *
170   * @param d true if a dummy value is to be inserted for
171   * each nominal attribute.
172   */
173  public void setInsertDummyNominalFirstValue(boolean d) {
174    m_insertDummyNominalFirstValue = d;
175  }
176 
177  /**
178   * Get whether a dummy first value will be inserted in the definition
179   * of each nominal attribute.
180   *
181   * @return true if a dummy first value will be inserted for each nominal
182   * attribute.
183   */
184  public boolean getInsertDummyNominalFirstValue() {
185    return m_insertDummyNominalFirstValue;
186  }
187 
188  /**
189   * Returns the tip text for this property
190   *
191   * @return            tip text for this property suitable for
192   *                    displaying in the explorer/experimenter gui
193   */
194  public String insertDummyNominalFirstValueTipText() {
195    return "Insert a dummy value before the first declared value "
196    + "for all nominal attributes. Useful when converting market "
197    + "basket data that has been encoded for Apriori to sparse format. "
198    + "Typically used in conjuction with treat missing values as zero.";
199   
200               
201  }
202
203  /**
204   * Sets the format of the input instances.
205   *
206   * @param instanceInfo an Instances object containing the input instance
207   * structure (any instances contained in the object are ignored - only the
208   * structure is required).
209   * @return true if the outputFormat may be collected immediately
210   * @throws Exception if format cannot be processed
211   */
212  public boolean setInputFormat(Instances instanceInfo) throws Exception {
213
214    super.setInputFormat(instanceInfo);
215    Instances instNew = instanceInfo;
216   
217    if (m_insertDummyNominalFirstValue) {
218      FastVector atts = new FastVector();
219      for (int i = 0; i < instanceInfo.numAttributes(); i++) {
220        if (instanceInfo.attribute(i).isNominal()) {
221          FastVector labels = new FastVector();
222          labels.addElement("_d");
223          for (int j = 0; j < instanceInfo.attribute(j).numValues(); j++) {
224            labels.addElement(instanceInfo.attribute(i).value(j));
225          }
226          Attribute newAtt = new Attribute(instanceInfo.attribute(i).name(), 
227              labels);
228          atts.addElement(newAtt);
229        } else {
230          atts.addElement(instanceInfo.attribute(i));
231        }
232      }
233      instNew = new Instances(instanceInfo.relationName(), atts, 0);
234    }
235   
236    setOutputFormat(instNew);
237    return true;
238  }
239
240
241  /**
242   * Input an instance for filtering. Ordinarily the instance is processed
243   * and made available for output immediately. Some filters require all
244   * instances be read before producing output.
245   *
246   * @param instance the input instance.
247   * @return true if the filtered instance may now be
248   * collected with output().
249   * @throws IllegalStateException if no input format has been set.
250   */
251  public boolean input(Instance instance) {
252
253    Instance newInstance = null;
254   
255    if (getInputFormat() == null) {
256      throw new IllegalStateException("No input instance format defined");
257    }
258    if (m_NewBatch) {
259      resetQueue();
260      m_NewBatch = false;
261    }
262   
263    if (m_encodeMissingAsZero && !m_insertDummyNominalFirstValue) {
264      Instance tempInst = (Instance)instance.copy();
265      tempInst.setDataset(getInputFormat());
266     
267      for (int i = 0; i < tempInst.numAttributes(); i++) {
268        if (tempInst.isMissing(i)) {
269          tempInst.setValue(i, 0);
270        }
271      }
272      instance = tempInst;
273    }
274   
275    if (m_insertDummyNominalFirstValue) {
276      double[] values = instance.toDoubleArray();     
277      for (int i = 0; i < instance.numAttributes(); i++) {
278        if (instance.attribute(i).isNominal()) {
279          if (!Utils.isMissingValue(values[i])) {
280            values[i]++;
281          }
282        }
283        if (m_encodeMissingAsZero && Utils.isMissingValue(values[i])) {
284          values[i] = 0;
285        }
286      }
287      newInstance = new SparseInstance(instance.weight(), values);
288      newInstance.setDataset(getOutputFormat());
289      push(newInstance);
290    } else {
291      newInstance = new SparseInstance(instance);
292      newInstance.setDataset(instance.dataset());
293      push(newInstance);
294    }
295   
296    /*Instance inst = new SparseInstance(instance);
297    inst.setDataset(instance.dataset());
298    push(inst); */
299    return true;
300  }
301 
302  /**
303   * Returns the revision string.
304   *
305   * @return            the revision
306   */
307  public String getRevision() {
308    return RevisionUtils.extract("$Revision: 5987 $");
309  }
310
311  /**
312   * Main method for testing this class.
313   *
314   * @param argv should contain arguments to the filter: use -h for help
315   */
316  public static void main(String [] argv) {
317    runFilter(new NonSparseToSparse(), argv);
318  }
319}
Note: See TracBrowser for help on using the repository browser.