source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/NumericToBinary.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 7.2 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    NumericToBinary.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.filters.unsupervised.attribute;
24
25import weka.core.Attribute;
26import weka.core.Capabilities;
27import weka.core.FastVector;
28import weka.core.Instance; 
29import weka.core.DenseInstance;
30import weka.core.Instances;
31import weka.core.RevisionUtils;
32import weka.core.SparseInstance;
33import weka.core.Capabilities.Capability;
34import weka.filters.StreamableFilter;
35import weka.filters.UnsupervisedFilter;
36
37/**
38 <!-- globalinfo-start -->
39 * Converts all numeric attributes into binary attributes (apart from the class attribute, if set): if the value of the numeric attribute is exactly zero, the value of the new attribute will be zero. If the value of the numeric attribute is missing, the value of the new attribute will be missing. Otherwise, the value of the new attribute will be one. The new attributes will be nominal.
40 * <p/>
41 <!-- globalinfo-end -->
42 *
43 <!-- options-start -->
44 * Valid options are: <p/>
45 *
46 * <pre> -unset-class-temporarily
47 *  Unsets the class index temporarily before the filter is
48 *  applied to the data.
49 *  (default: no)</pre>
50 *
51 <!-- options-end -->
52 *
53 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
54 * @version $Revision: 5987 $
55 */
56public class NumericToBinary 
57  extends PotentialClassIgnorer
58  implements UnsupervisedFilter, StreamableFilter {
59
60  /** for serialization */
61  static final long serialVersionUID = 2616879323359470802L;
62 
63  /**
64   * Returns a string describing this filter
65   *
66   * @return a description of the filter suitable for
67   * displaying in the explorer/experimenter gui
68   */
69  public String globalInfo() {
70
71    return "Converts all numeric attributes into binary attributes (apart from "
72      + "the class attribute, if set): if the value of the numeric attribute is "
73      + "exactly zero, the value of the new attribute will be zero. If the "
74      + "value of the numeric attribute is missing, the value of the new "
75      + "attribute will be missing. Otherwise, the value of the new "
76      + "attribute will be one. The new attributes will be nominal.";
77  }
78
79  /**
80   * Returns the Capabilities of this filter.
81   *
82   * @return            the capabilities of this object
83   * @see               Capabilities
84   */
85  public Capabilities getCapabilities() {
86    Capabilities result = super.getCapabilities();
87    result.disableAll();
88
89    // attributes
90    result.enableAllAttributes();
91    result.enable(Capability.MISSING_VALUES);
92   
93    // class
94    result.enableAllClasses();
95    result.enable(Capability.MISSING_CLASS_VALUES);
96    result.enable(Capability.NO_CLASS);
97   
98    return result;
99  }
100
101  /**
102   * Sets the format of the input instances.
103   *
104   * @param instanceInfo an Instances object containing the input
105   * instance structure (any instances contained in the object are
106   * ignored - only the structure is required).
107   * @return true if the outputFormat may be collected immediately
108   * @throws Exception if the input format can't be set
109   * successfully
110   */
111  public boolean setInputFormat(Instances instanceInfo) throws Exception {
112
113    super.setInputFormat(instanceInfo);
114    setOutputFormat();
115    return true;
116  }
117
118  /**
119   * Input an instance for filtering.
120   *
121   * @param instance the input instance
122   * @return true if the filtered instance may now be
123   * collected with output().
124   * @throws IllegalStateException if no input format has been defined.
125   */
126  public boolean input(Instance instance) {
127
128    if (getInputFormat() == null) {
129      throw new IllegalStateException("No input instance format defined");
130    }
131    if (m_NewBatch) {
132      resetQueue();
133      m_NewBatch = false;
134    }
135    convertInstance(instance);
136    return true;
137  }
138
139  /**
140   * Set the output format.
141   */
142  private void setOutputFormat() {
143
144    FastVector newAtts;
145    int newClassIndex;
146    StringBuffer attributeName;
147    Instances outputFormat;
148    FastVector vals;
149
150    // Compute new attributes
151    newClassIndex = getInputFormat().classIndex();
152    newAtts = new FastVector();
153    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
154      Attribute att = getInputFormat().attribute(j);
155      if ((j == newClassIndex) || (!att.isNumeric())) {
156        newAtts.addElement(att.copy());
157      } else {
158        attributeName = new StringBuffer(att.name() + "_binarized");
159        vals = new FastVector(2);
160        vals.addElement("0"); vals.addElement("1");
161        newAtts.addElement(new Attribute(attributeName.toString(), vals));
162      }
163    }
164    outputFormat = new Instances(getInputFormat().relationName(), newAtts, 0);
165    outputFormat.setClassIndex(newClassIndex);
166    setOutputFormat(outputFormat);
167  }
168
169  /**
170   * Convert a single instance over. The converted instance is
171   * added to the end of the output queue.
172   *
173   * @param instance the instance to convert
174   */
175  private void convertInstance(Instance instance) {
176 
177    Instance inst = null;
178    if (instance instanceof SparseInstance) {
179      double[] vals = new double[instance.numValues()];
180      int[] newIndices = new int[instance.numValues()];
181      for (int j = 0; j < instance.numValues(); j++) {
182        Attribute att = getInputFormat().attribute(instance.index(j));
183        if ((!att.isNumeric()) || (instance.index(j) == getInputFormat().classIndex())) {
184          vals[j] = instance.valueSparse(j);
185        } else {
186          if (instance.isMissingSparse(j)) {
187            vals[j] = instance.valueSparse(j);
188          } else {
189            vals[j] = 1;
190          }
191        } 
192        newIndices[j] = instance.index(j);
193      }
194      inst = new SparseInstance(instance.weight(), vals, newIndices, 
195                                outputFormatPeek().numAttributes());
196    } else {
197      double[] vals = new double[outputFormatPeek().numAttributes()];
198      for (int j = 0; j < getInputFormat().numAttributes(); j++) {
199        Attribute att = getInputFormat().attribute(j);
200        if ((!att.isNumeric()) || (j == getInputFormat().classIndex())) {
201          vals[j] = instance.value(j);
202        } else {
203          if (instance.isMissing(j) || (instance.value(j) == 0)) {
204            vals[j] = instance.value(j);
205          } else {
206            vals[j] = 1;
207          }
208        } 
209      }
210      inst = new DenseInstance(instance.weight(), vals);
211    }
212    inst.setDataset(instance.dataset());
213    push(inst);
214  }
215 
216  /**
217   * Returns the revision string.
218   *
219   * @return            the revision
220   */
221  public String getRevision() {
222    return RevisionUtils.extract("$Revision: 5987 $");
223  }
224
225  /**
226   * Main method for testing this class.
227   *
228   * @param argv should contain arguments to the filter:
229   * use -h for help
230   */
231  public static void main(String [] argv) {
232    runFilter(new NumericToBinary(), argv);
233  }
234}
Note: See TracBrowser for help on using the repository browser.