source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/Center.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 10.6 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * Center.java
19 * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.filters.unsupervised.attribute;
24
25import weka.core.Capabilities;
26import weka.core.Instance; 
27import weka.core.DenseInstance;
28import weka.core.Instances;
29import weka.core.RevisionUtils;
30import weka.core.SparseInstance;
31import weka.core.Utils;
32import weka.core.Capabilities.Capability;
33import weka.filters.Sourcable;
34import weka.filters.UnsupervisedFilter;
35
36/**
37 <!-- globalinfo-start -->
38 * Centers all numeric attributes in the given dataset to have zero mean (apart from the class attribute, if set).
39 * <p/>
40 <!-- globalinfo-end -->
41 *
42 <!-- options-start -->
43 * Valid options are: <p/>
44 *
45 * <pre> -unset-class-temporarily
46 *  Unsets the class index temporarily before the filter is
47 *  applied to the data.
48 *  (default: no)</pre>
49 *
50 <!-- options-end -->
51 *
52 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
53 * @author FracPete (fracpete at waikato dot ac dot nz)
54 * @version $Revision: 5987 $
55 */
56public class Center 
57  extends PotentialClassIgnorer
58  implements UnsupervisedFilter, Sourcable {
59
60  /** for serialization */
61  private static final long serialVersionUID = -9101338448900581023L;
62 
63  /** The means */
64  private double[] m_Means;
65
66  /**
67   * Returns a string describing this filter
68   *
69   * @return            a description of the filter suitable for
70   *                    displaying in the explorer/experimenter gui
71   */
72  public String globalInfo() {
73
74    return "Centers all numeric attributes in the given dataset "
75      + "to have zero mean (apart from the class attribute, if set).";
76  }
77
78  /**
79   * Returns the Capabilities of this filter.
80   *
81   * @return            the capabilities of this object
82   * @see               Capabilities
83   */
84  public Capabilities getCapabilities() {
85    Capabilities result = super.getCapabilities();
86    result.disableAll();
87
88    // attributes
89    result.enableAllAttributes();
90    result.enable(Capability.MISSING_VALUES);
91   
92    // class
93    result.enableAllClasses();
94    result.enable(Capability.MISSING_CLASS_VALUES);
95    result.enable(Capability.NO_CLASS);
96   
97    return result;
98  }
99
100  /**
101   * Sets the format of the input instances.
102   *
103   * @param instanceInfo        an Instances object containing the input
104   *                            instance structure (any instances contained
105   *                            in the object are ignored - only the structure
106   *                            is required).
107   * @return true               if the outputFormat may be collected immediately
108   * @throws Exception          if the input format can't be set successfully
109   */
110  public boolean setInputFormat(Instances instanceInfo) throws Exception {
111    super.setInputFormat(instanceInfo);
112    setOutputFormat(instanceInfo);
113    m_Means = null;
114    return true;
115  }
116
117  /**
118   * Input an instance for filtering. Filter requires all
119   * training instances be read before producing output.
120   *
121   * @param instance                    the input instance
122   * @return true                       if the filtered instance may now be
123   *                                    collected with output().
124   * @throws IllegalStateException      if no input format has been set.
125   */
126  public boolean input(Instance instance) {
127
128    if (getInputFormat() == null)
129      throw new IllegalStateException("No input instance format defined");
130
131    if (m_NewBatch) {
132      resetQueue();
133      m_NewBatch = false;
134    }
135   
136    if (m_Means == null) {
137      bufferInput(instance);
138      return false;
139    } 
140    else {
141      convertInstance(instance);
142      return true;
143    }
144  }
145
146  /**
147   * Signify that this batch of input to the filter is finished.
148   * If the filter requires all instances prior to filtering,
149   * output() may now be called to retrieve the filtered instances.
150   *
151   * @return true                       if there are instances pending output
152   * @throws IllegalStateException      if no input structure has been defined
153   */
154  public boolean batchFinished() {
155    if (getInputFormat() == null)
156      throw new IllegalStateException("No input instance format defined");
157   
158    if (m_Means == null) {
159      Instances input = getInputFormat();
160      m_Means = new double[input.numAttributes()];
161      for (int i = 0; i < input.numAttributes(); i++) {
162        if (input.attribute(i).isNumeric() &&
163            (input.classIndex() != i)) {
164          m_Means[i] = input.meanOrMode(i);
165        }
166      }
167
168      // Convert pending input instances
169      for (int i = 0; i < input.numInstances(); i++)
170        convertInstance(input.instance(i));
171    }
172   
173    // Free memory
174    flushInput();
175
176    m_NewBatch = true;
177    return (numPendingOutput() != 0);
178  }
179
180  /**
181   * Convert a single instance over. The converted instance is
182   * added to the end of the output queue.
183   *
184   * @param instance    the instance to convert
185   */
186  private void convertInstance(Instance instance) {
187    Instance inst = null;
188   
189    if (instance instanceof SparseInstance) {
190      double[] newVals = new double[instance.numAttributes()];
191      int[] newIndices = new int[instance.numAttributes()];
192      double[] vals = instance.toDoubleArray();
193      int ind = 0;
194      for (int j = 0; j < instance.numAttributes(); j++) {
195        double value;
196        if (instance.attribute(j).isNumeric() &&
197            (!Utils.isMissingValue(vals[j])) &&
198            (getInputFormat().classIndex() != j)) {
199         
200          value = vals[j] - m_Means[j];
201          if (value != 0.0) {
202            newVals[ind] = value;
203            newIndices[ind] = j;
204            ind++;
205          }
206        } else {
207          value = vals[j];
208          if (value != 0.0) {
209            newVals[ind] = value;
210            newIndices[ind] = j;
211            ind++;
212          }
213        }
214      } 
215      double[] tempVals = new double[ind];
216      int[] tempInd = new int[ind];
217      System.arraycopy(newVals, 0, tempVals, 0, ind);
218      System.arraycopy(newIndices, 0, tempInd, 0, ind);
219      inst = new SparseInstance(instance.weight(), tempVals, tempInd,
220                                instance.numAttributes());
221    } 
222    else {
223      double[] vals = instance.toDoubleArray();
224      for (int j = 0; j < getInputFormat().numAttributes(); j++) {
225        if (instance.attribute(j).isNumeric() &&
226            (!Utils.isMissingValue(vals[j])) &&
227            (getInputFormat().classIndex() != j)) {
228          vals[j] = (vals[j] - m_Means[j]);
229        }
230      } 
231      inst = new DenseInstance(instance.weight(), vals);
232    }
233   
234    inst.setDataset(instance.dataset());
235   
236    push(inst);
237  }
238 
239  /**
240   * Returns a string that describes the filter as source. The
241   * filter will be contained in a class with the given name (there may
242   * be auxiliary classes),
243   * and will contain two methods with these signatures:
244   * <pre><code>
245   * // converts one row
246   * public static Object[] filter(Object[] i);
247   * // converts a full dataset (first dimension is row index)
248   * public static Object[][] filter(Object[][] i);
249   * </code></pre>
250   * where the array <code>i</code> contains elements that are either
251   * Double, String, with missing values represented as null. The generated
252   * code is public domain and comes with no warranty.
253   *
254   * @param className   the name that should be given to the source class.
255   * @param data        the dataset used for initializing the filter
256   * @return            the object source described by a string
257   * @throws Exception  if the source can't be computed
258   */
259  public String toSource(String className, Instances data) throws Exception {
260    StringBuffer        result;
261    boolean[]           process;
262    int                 i;
263   
264    result = new StringBuffer();
265   
266    // determine what attributes were processed
267    process = new boolean[data.numAttributes()];
268    for (i = 0; i < data.numAttributes(); i++) {
269      process[i] = (data.attribute(i).isNumeric() && (i != data.classIndex()));
270    }
271   
272    result.append("class " + className + " {\n");
273    result.append("\n");
274    result.append("  /** lists which attributes will be processed */\n");
275    result.append("  protected final static boolean[] PROCESS = new boolean[]{" + Utils.arrayToString(process) + "};\n");
276    result.append("\n");
277    result.append("  /** the computed means */\n");
278    result.append("  protected final static double[] MEANS = new double[]{" + Utils.arrayToString(m_Means) + "};\n");
279    result.append("\n");
280    result.append("  /**\n");
281    result.append("   * filters a single row\n");
282    result.append("   * \n");
283    result.append("   * @param i the row to process\n");
284    result.append("   * @return the processed row\n");
285    result.append("   */\n");
286    result.append("  public static Object[] filter(Object[] i) {\n");
287    result.append("    Object[] result;\n");
288    result.append("\n");
289    result.append("    result = new Object[i.length];\n");
290    result.append("    for (int n = 0; n < i.length; n++) {\n");
291    result.append("      if (PROCESS[n] && (i[n] != null))\n");
292    result.append("        result[n] = ((Double) i[n]) - MEANS[n];\n");
293    result.append("      else\n");
294    result.append("        result[n] = i[n];\n");
295    result.append("    }\n");
296    result.append("\n");
297    result.append("    return result;\n");
298    result.append("  }\n");
299    result.append("\n");
300    result.append("  /**\n");
301    result.append("   * filters multiple rows\n");
302    result.append("   * \n");
303    result.append("   * @param i the rows to process\n");
304    result.append("   * @return the processed rows\n");
305    result.append("   */\n");
306    result.append("  public static Object[][] filter(Object[][] i) {\n");
307    result.append("    Object[][] result;\n");
308    result.append("\n");
309    result.append("    result = new Object[i.length][];\n");
310    result.append("    for (int n = 0; n < i.length; n++) {\n");
311    result.append("      result[n] = filter(i[n]);\n");
312    result.append("    }\n");
313    result.append("\n");
314    result.append("    return result;\n");
315    result.append("  }\n");
316    result.append("}\n");
317   
318    return result.toString();
319  }
320 
321  /**
322   * Returns the revision string.
323   *
324   * @return            the revision
325   */
326  public String getRevision() {
327    return RevisionUtils.extract("$Revision: 5987 $");
328  }
329
330  /**
331   * Main method for running this filter.
332   *
333   * @param args        should contain arguments to the filter: use -h for help
334   */
335  public static void main(String [] args) {
336    runFilter(new Center(), args);
337  }
338}
Note: See TracBrowser for help on using the repository browser.