source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/PKIDiscretize.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 10.0 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    PKIDiscretize.java
19 *    Copyright (C) 2003 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.filters.unsupervised.attribute;
24
25import weka.core.Instances;
26import weka.core.Option;
27import weka.core.RevisionUtils;
28import weka.core.TechnicalInformation;
29import weka.core.TechnicalInformationHandler;
30import weka.core.Utils;
31import weka.core.TechnicalInformation.Field;
32import weka.core.TechnicalInformation.Type;
33
34import java.util.Enumeration;
35import java.util.Vector;
36
37/**
38 <!-- globalinfo-start -->
39 * Discretizes numeric attributes using equal frequency binning, where the number of bins is equal to the square root of the number of non-missing values.<br/>
40 * <br/>
41 * For more information, see:<br/>
42 * <br/>
43 * Ying Yang, Geoffrey I. Webb: Proportional k-Interval Discretization for Naive-Bayes Classifiers. In: 12th European Conference on Machine Learning, 564-575, 2001.
44 * <p/>
45 <!-- globalinfo-end -->
46 *
47 <!-- technical-bibtex-start -->
48 * BibTeX:
49 * <pre>
50 * &#64;inproceedings{Yang2001,
51 *    author = {Ying Yang and Geoffrey I. Webb},
52 *    booktitle = {12th European Conference on Machine Learning},
53 *    pages = {564-575},
54 *    publisher = {Springer},
55 *    series = {LNCS},
56 *    title = {Proportional k-Interval Discretization for Naive-Bayes Classifiers},
57 *    volume = {2167},
58 *    year = {2001}
59 * }
60 * </pre>
61 * <p/>
62 <!-- technical-bibtex-end -->
63 *
64 <!-- options-start -->
65 * Valid options are: <p/>
66 *
67 * <pre> -unset-class-temporarily
68 *  Unsets the class index temporarily before the filter is
69 *  applied to the data.
70 *  (default: no)</pre>
71 *
72 * <pre> -R &lt;col1,col2-col4,...&gt;
73 *  Specifies list of columns to Discretize. First and last are valid indexes.
74 *  (default: first-last)</pre>
75 *
76 * <pre> -V
77 *  Invert matching sense of column indexes.</pre>
78 *
79 * <pre> -D
80 *  Output binary attributes for discretized attributes.</pre>
81 *
82 <!-- options-end -->
83 *
84 * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
85 * @version $Revision: 1.9 $
86 */
87public class PKIDiscretize 
88  extends Discretize
89  implements TechnicalInformationHandler {
90 
91  /** for serialization */
92  static final long serialVersionUID = 6153101248977702675L;
93
94  /**
95   * Sets the format of the input instances.
96   *
97   * @param instanceInfo an Instances object containing the input instance
98   * structure (any instances contained in the object are ignored - only the
99   * structure is required).
100   * @return true if the outputFormat may be collected immediately
101   * @throws Exception if the input format can't be set successfully
102   */
103  public boolean setInputFormat(Instances instanceInfo) throws Exception {
104
105    // alter child behaviour to do what we want
106    m_FindNumBins = true;
107    return super.setInputFormat(instanceInfo);
108  }
109
110  /**
111   * Finds the number of bins to use and creates the cut points.
112   *
113   * @param index the attribute index
114   */
115  protected void findNumBins(int index) {
116
117    Instances toFilter = getInputFormat();
118
119    // Find number of instances for attribute where not missing
120    int numOfInstances = toFilter.numInstances();
121    for (int i = 0; i < toFilter.numInstances(); i++) {
122      if (toFilter.instance(i).isMissing(index))
123        numOfInstances--;
124    }
125
126    m_NumBins = (int)(Math.sqrt(numOfInstances));
127
128    if (m_NumBins > 0) {
129      calculateCutPointsByEqualFrequencyBinning(index);
130    }
131  }
132
133  /**
134   * Gets an enumeration describing the available options.
135   *
136   * @return an enumeration of all the available options.
137   */
138  public Enumeration listOptions() {
139    Vector result = new Vector();
140   
141    result.addElement(new Option(
142        "\tUnsets the class index temporarily before the filter is\n"
143        + "\tapplied to the data.\n"
144        + "\t(default: no)",
145        "unset-class-temporarily", 1, "-unset-class-temporarily"));
146   
147    result.addElement(new Option(
148        "\tSpecifies list of columns to Discretize. First"
149        + " and last are valid indexes.\n"
150        + "\t(default: first-last)",
151        "R", 1, "-R <col1,col2-col4,...>"));
152   
153    result.addElement(new Option(
154        "\tInvert matching sense of column indexes.",
155        "V", 0, "-V"));
156   
157    result.addElement(new Option(
158        "\tOutput binary attributes for discretized attributes.",
159        "D", 0, "-D"));
160   
161    return result.elements();
162  }
163
164
165  /**
166   * Parses a given list of options. <p/>
167   *
168   <!-- options-start -->
169   * Valid options are: <p/>
170   *
171   * <pre> -unset-class-temporarily
172   *  Unsets the class index temporarily before the filter is
173   *  applied to the data.
174   *  (default: no)</pre>
175   *
176   * <pre> -R &lt;col1,col2-col4,...&gt;
177   *  Specifies list of columns to Discretize. First and last are valid indexes.
178   *  (default: first-last)</pre>
179   *
180   * <pre> -V
181   *  Invert matching sense of column indexes.</pre>
182   *
183   * <pre> -D
184   *  Output binary attributes for discretized attributes.</pre>
185   *
186   <!-- options-end -->
187   *
188   * @param options the list of options as an array of strings
189   * @throws Exception if an option is not supported
190   */
191  public void setOptions(String[] options) throws Exception {
192
193    setIgnoreClass(Utils.getFlag("unset-class-temporarily", options));
194    setMakeBinary(Utils.getFlag('D', options));
195    setInvertSelection(Utils.getFlag('V', options));
196   
197    String convertList = Utils.getOption('R', options);
198    if (convertList.length() != 0) {
199      setAttributeIndices(convertList);
200    } else {
201      setAttributeIndices("first-last");
202    }
203
204    if (getInputFormat() != null) {
205      setInputFormat(getInputFormat());
206    }
207  }
208  /**
209   * Gets the current settings of the filter.
210   *
211   * @return an array of strings suitable for passing to setOptions
212   */
213  public String[] getOptions() {
214    Vector        result;
215
216    result = new Vector();
217
218    if (getMakeBinary())
219      result.add("-D");
220   
221    if (getInvertSelection())
222      result.add("-V");
223   
224    if (!getAttributeIndices().equals("")) {
225      result.add("-R");
226      result.add(getAttributeIndices());
227    }
228
229    return (String[]) result.toArray(new String[result.size()]);
230  }
231
232  /**
233   * Returns a string describing this filter
234   *
235   * @return a description of the filter suitable for
236   * displaying in the explorer/experimenter gui
237   */
238  public String globalInfo() {
239
240    return "Discretizes numeric attributes using equal frequency binning,"
241      + " where the number of bins is equal to the square root of the"
242      + " number of non-missing values.\n\n"
243      + "For more information, see:\n\n"
244      + getTechnicalInformation().toString();
245  }
246
247  /**
248   * Returns an instance of a TechnicalInformation object, containing
249   * detailed information about the technical background of this class,
250   * e.g., paper reference or book this class is based on.
251   *
252   * @return the technical information about this class
253   */
254  public TechnicalInformation getTechnicalInformation() {
255    TechnicalInformation        result;
256   
257    result = new TechnicalInformation(Type.INPROCEEDINGS);
258    result.setValue(Field.AUTHOR, "Ying Yang and Geoffrey I. Webb");
259    result.setValue(Field.TITLE, "Proportional k-Interval Discretization for Naive-Bayes Classifiers");
260    result.setValue(Field.BOOKTITLE, "12th European Conference on Machine Learning");
261    result.setValue(Field.YEAR, "2001");
262    result.setValue(Field.PAGES, "564-575");
263    result.setValue(Field.PUBLISHER, "Springer");
264    result.setValue(Field.SERIES, "LNCS");
265    result.setValue(Field.VOLUME, "2167");
266   
267    return result;
268  }
269 
270  /**
271   * Returns the tip text for this property
272   *
273   * @return tip text for this property suitable for
274   * displaying in the explorer/experimenter gui
275   */
276  public String findNumBinsTipText() {
277
278    return "Ignored.";
279  }
280
281  /**
282   * Get the value of FindNumBins.
283   *
284   * @return Value of FindNumBins.
285   */
286  public boolean getFindNumBins() {
287   
288    return false;
289  }
290 
291  /**
292   * Set the value of FindNumBins.
293   *
294   * @param newFindNumBins Value to assign to FindNumBins.
295   */
296  public void setFindNumBins(boolean newFindNumBins) {
297   
298  }
299 
300  /**
301   * Returns the tip text for this property
302   *
303   * @return tip text for this property suitable for
304   * displaying in the explorer/experimenter gui
305   */
306  public String useEqualFrequencyTipText() {
307
308    return "Always true.";
309  }
310
311  /**
312   * Get the value of UseEqualFrequency.
313   *
314   * @return Value of UseEqualFrequency.
315   */
316  public boolean getUseEqualFrequency() {
317   
318    return true;
319  }
320 
321  /**
322   * Set the value of UseEqualFrequency.
323   *
324   * @param newUseEqualFrequency Value to assign to UseEqualFrequency.
325   */
326  public void setUseEqualFrequency(boolean newUseEqualFrequency) {
327   
328  }
329
330  /**
331   * Returns the tip text for this property
332   *
333   * @return tip text for this property suitable for
334   * displaying in the explorer/experimenter gui
335   */
336  public String binsTipText() {
337
338    return "Ignored.";
339  }
340
341  /**
342   * Ignored
343   *
344   * @return the number of bins.
345   */
346  public int getBins() {
347
348    return 0;
349  }
350
351  /**
352   * Ignored
353   *
354   * @param numBins the number of bins
355   */
356  public void setBins(int numBins) {
357
358  }
359 
360  /**
361   * Returns the revision string.
362   *
363   * @return            the revision
364   */
365  public String getRevision() {
366    return RevisionUtils.extract("$Revision: 1.9 $");
367  }
368
369  /**
370   * Main method for testing this class.
371   *
372   * @param argv should contain arguments to the filter: use -h for help
373   */
374  public static void main(String [] argv) {
375    runFilter(new PKIDiscretize(), argv);
376  }
377}
Note: See TracBrowser for help on using the repository browser.