source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/NominalToBinary.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 15.1 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    NominalToBinary.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.filters.unsupervised.attribute;
25
26import weka.core.Attribute;
27import weka.core.Capabilities;
28import weka.core.FastVector;
29import weka.core.Instance; 
30import weka.core.DenseInstance;
31import weka.core.DenseInstance;
32import weka.core.Instances;
33import weka.core.Option;
34import weka.core.OptionHandler;
35import weka.core.Range;
36import weka.core.RevisionUtils;
37import weka.core.SparseInstance;
38import weka.core.Utils;
39import weka.core.Capabilities.Capability;
40import weka.filters.Filter;
41import weka.filters.UnsupervisedFilter;
42
43import java.util.Enumeration;
44import java.util.Vector;
45
46/**
47 <!-- globalinfo-start -->
48 * Converts all nominal attributes into binary numeric attributes. An attribute with k values is transformed into k binary attributes if the class is nominal (using the one-attribute-per-value approach). Binary attributes are left binary, if option '-A' is not given.If the class is numeric, you might want to use the supervised version of this filter.
49 * <p/>
50 <!-- globalinfo-end -->
51 *
52 <!-- options-start -->
53 * Valid options are: <p/>
54 *
55 * <pre> -N
56 *  Sets if binary attributes are to be coded as nominal ones.</pre>
57 *
58 * <pre> -A
59 *  For each nominal value a new attribute is created,
60 *  not only if there are more than 2 values.</pre>
61 *
62 * <pre> -R &lt;col1,col2-col4,...&gt;
63 *  Specifies list of columns to act on. First and last are
64 *  valid indexes.
65 *  (default: first-last)</pre>
66 *
67 * <pre> -V
68 *  Invert matching sense of column indexes.</pre>
69 *
70 <!-- options-end -->
71 *
72 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
73 * @version $Revision: 5987 $
74 */
75public class NominalToBinary 
76  extends Filter
77  implements UnsupervisedFilter, OptionHandler {
78 
79  /** for serialization */
80  static final long serialVersionUID = -1130642825710549138L;
81
82  /** Stores which columns to act on */
83  protected Range m_Columns = new Range();
84
85  /** Are the new attributes going to be nominal or numeric ones? */
86  private boolean m_Numeric = true;
87
88  /** Are all values transformed into new attributes? */
89  private boolean m_TransformAll = false;
90
91  /** Constructor - initialises the filter */
92  public NominalToBinary() {
93
94    setAttributeIndices("first-last");
95  }
96
97  /**
98   * Returns a string describing this filter
99   *
100   * @return a description of the filter suitable for
101   * displaying in the explorer/experimenter gui
102   */
103  public String globalInfo() {
104
105    return "Converts all nominal attributes into binary numeric attributes. An "
106      + "attribute with k values is transformed into k binary attributes if "
107      + "the class is nominal (using the one-attribute-per-value approach). "
108      + "Binary attributes are left binary, if option '-A' is not given."
109      + "If the class is numeric, you might want to use the supervised version of "
110      + "this filter.";
111  }
112
113  /**
114   * Returns the Capabilities of this filter.
115   *
116   * @return            the capabilities of this object
117   * @see               Capabilities
118   */
119  public Capabilities getCapabilities() {
120    Capabilities result = super.getCapabilities();
121    result.disableAll();
122
123    // attributes
124    result.enableAllAttributes();
125    result.enable(Capability.MISSING_VALUES);
126   
127    // class
128    result.enableAllClasses();
129    result.enable(Capability.MISSING_CLASS_VALUES);
130    result.enable(Capability.NO_CLASS);
131   
132    return result;
133  }
134
135  /**
136   * Sets the format of the input instances.
137   *
138   * @param instanceInfo an Instances object containing the input
139   * instance structure (any instances contained in the object are
140   * ignored - only the structure is required).
141   * @return true if the outputFormat may be collected immediately
142   * @throws Exception if the input format can't be set
143   * successfully
144   */
145  public boolean setInputFormat(Instances instanceInfo) 
146       throws Exception {
147
148    super.setInputFormat(instanceInfo);
149
150    m_Columns.setUpper(instanceInfo.numAttributes() - 1);
151
152    setOutputFormat();
153    return true;
154  }
155
156  /**
157   * Input an instance for filtering. Filter requires all
158   * training instances be read before producing output.
159   *
160   * @param instance the input instance
161   * @return true if the filtered instance may now be
162   * collected with output().
163   * @throws IllegalStateException if no input format has been set
164   */
165  public boolean input(Instance instance) {
166
167    if (getInputFormat() == null) {
168      throw new IllegalStateException("No input instance format defined");
169    }
170    if (m_NewBatch) {
171      resetQueue();
172      m_NewBatch = false;
173    }
174
175    convertInstance(instance);
176    return true;
177  }
178
179  /**
180   * Returns an enumeration describing the available options.
181   *
182   * @return an enumeration of all the available options.
183   */
184  public Enumeration listOptions() {
185
186    Vector newVector = new Vector(3);
187
188    newVector.addElement(new Option(
189        "\tSets if binary attributes are to be coded as nominal ones.",
190        "N", 0, "-N"));
191
192    newVector.addElement(new Option(
193        "\tFor each nominal value a new attribute is created, \n"
194        + "\tnot only if there are more than 2 values.",
195        "A", 0, "-A"));
196
197    newVector.addElement(new Option(
198        "\tSpecifies list of columns to act on. First and last are \n"
199        + "\tvalid indexes.\n"
200        + "\t(default: first-last)",
201        "R", 1, "-R <col1,col2-col4,...>"));
202
203    newVector.addElement(new Option(
204        "\tInvert matching sense of column indexes.",
205        "V", 0, "-V"));
206
207    return newVector.elements();
208  }
209
210
211  /**
212   * Parses a given list of options. <p/>
213   *
214   <!-- options-start -->
215   * Valid options are: <p/>
216   *
217   * <pre> -N
218   *  Sets if binary attributes are to be coded as nominal ones.</pre>
219   *
220   * <pre> -A
221   *  For each nominal value a new attribute is created,
222   *  not only if there are more than 2 values.</pre>
223   *
224   * <pre> -R &lt;col1,col2-col4,...&gt;
225   *  Specifies list of columns to act on. First and last are
226   *  valid indexes.
227   *  (default: first-last)</pre>
228   *
229   * <pre> -V
230   *  Invert matching sense of column indexes.</pre>
231   *
232   <!-- options-end -->
233   *
234   * @param options the list of options as an array of strings
235   * @throws Exception if an option is not supported
236   */
237  public void setOptions(String[] options) throws Exception {
238
239    setBinaryAttributesNominal(Utils.getFlag('N', options));
240
241    setTransformAllValues(Utils.getFlag('A', options));
242
243    String convertList = Utils.getOption('R', options);
244    if (convertList.length() != 0) {
245      setAttributeIndices(convertList);
246    } else {
247      setAttributeIndices("first-last");
248    }
249    setInvertSelection(Utils.getFlag('V', options));
250
251    if (getInputFormat() != null)
252      setInputFormat(getInputFormat());
253  }
254
255  /**
256   * Gets the current settings of the filter.
257   *
258   * @return an array of strings suitable for passing to setOptions
259   */
260  public String [] getOptions() {
261
262    String [] options = new String [4];
263    int current = 0;
264
265    if (getBinaryAttributesNominal()) {
266      options[current++] = "-N";
267    }
268
269    if (getTransformAllValues()) {
270      options[current++] = "-A";
271    }
272
273    if (!getAttributeIndices().equals("")) {
274      options[current++] = "-R"; options[current++] = getAttributeIndices();
275    }
276    if (getInvertSelection()) {
277      options[current++] = "-V";
278    }
279
280    while (current < options.length) {
281      options[current++] = "";
282    }
283    return options;
284  }
285
286  /**
287   * Returns the tip text for this property
288   *
289   * @return tip text for this property suitable for
290   * displaying in the explorer/experimenter gui
291   */
292  public String binaryAttributesNominalTipText() {
293    return "Whether resulting binary attributes will be nominal.";
294  }
295
296  /**
297   * Gets if binary attributes are to be treated as nominal ones.
298   *
299   * @return true if binary attributes are to be treated as nominal ones
300   */
301  public boolean getBinaryAttributesNominal() {
302
303    return !m_Numeric;
304  }
305
306  /**
307   * Sets if binary attributes are to be treates as nominal ones.
308   *
309   * @param bool true if binary attributes are to be treated as nominal ones
310   */
311  public void setBinaryAttributesNominal(boolean bool) {
312
313    m_Numeric = !bool;
314  }
315
316  /**
317   * Returns the tip text for this property
318   *
319   * @return tip text for this property suitable for
320   * displaying in the explorer/experimenter gui
321   */
322  public String transformAllValuesTipText() {
323    return "Whether all nominal values are turned into new attributes, not only if there are more than 2.";
324  }
325
326  /**
327   * Gets if all nominal values are turned into new attributes, not only if
328   * there are more than 2.
329   *
330   * @return true all nominal values are transformed into new attributes
331   */
332  public boolean getTransformAllValues() {
333
334    return m_TransformAll;
335  }
336
337  /**
338   * Sets whether all nominal values are transformed into new attributes, not
339   * just if there are more than 2.
340   *
341   * @param bool true if all nominal value are transformed into new attributes
342   */
343  public void setTransformAllValues(boolean bool) {
344
345    m_TransformAll = bool;
346  }
347
348  /**
349   * Returns the tip text for this property
350   *
351   * @return tip text for this property suitable for
352   * displaying in the explorer/experimenter gui
353   */
354  public String invertSelectionTipText() {
355
356    return "Set attribute selection mode. If false, only selected"
357      + " (numeric) attributes in the range will be discretized; if"
358      + " true, only non-selected attributes will be discretized.";
359  }
360
361  /**
362   * Gets whether the supplied columns are to be removed or kept
363   *
364   * @return true if the supplied columns will be kept
365   */
366  public boolean getInvertSelection() {
367
368    return m_Columns.getInvert();
369  }
370
371  /**
372   * Sets whether selected columns should be removed or kept. If true the
373   * selected columns are kept and unselected columns are deleted. If false
374   * selected columns are deleted and unselected columns are kept.
375   *
376   * @param invert the new invert setting
377   */
378  public void setInvertSelection(boolean invert) {
379
380    m_Columns.setInvert(invert);
381  }
382
383  /**
384   * Returns the tip text for this property
385   *
386   * @return tip text for this property suitable for
387   * displaying in the explorer/experimenter gui
388   */
389  public String attributeIndicesTipText() {
390    return "Specify range of attributes to act on."
391      + " This is a comma separated list of attribute indices, with"
392      + " \"first\" and \"last\" valid values. Specify an inclusive"
393      + " range with \"-\". E.g: \"first-3,5,6-10,last\".";
394  }
395
396  /**
397   * Gets the current range selection
398   *
399   * @return a string containing a comma separated list of ranges
400   */
401  public String getAttributeIndices() {
402
403    return m_Columns.getRanges();
404  }
405
406  /**
407   * Sets which attributes are to be acted on.
408   *
409   * @param rangeList a string representing the list of attributes. Since
410   * the string will typically come from a user, attributes are indexed from
411   * 1. <br>
412   * eg: first-3,5,6-last
413   * @throws IllegalArgumentException if an invalid range list is supplied
414   */
415  public void setAttributeIndices(String rangeList) {
416
417    m_Columns.setRanges(rangeList);
418  }
419
420  /**
421   * Set the output format if the class is nominal.
422   */
423  private void setOutputFormat() {
424
425    FastVector newAtts;
426    int newClassIndex;
427    StringBuffer attributeName;
428    Instances outputFormat;
429    FastVector vals;
430
431    // Compute new attributes
432
433    newClassIndex = getInputFormat().classIndex();
434    newAtts = new FastVector();
435    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
436      Attribute att = getInputFormat().attribute(j);
437      if (!att.isNominal() || (j == getInputFormat().classIndex()) ||
438          !m_Columns.isInRange(j)) {
439        newAtts.addElement(att.copy());
440      } else {
441        if ( (att.numValues() <= 2) && (!m_TransformAll) ) {
442          if (m_Numeric) {
443            newAtts.addElement(new Attribute(att.name()));
444          } else {
445            newAtts.addElement(att.copy());
446          }
447        } else {
448
449          if (newClassIndex >= 0 && j < getInputFormat().classIndex()) {
450            newClassIndex += att.numValues() - 1;
451          }
452
453          // Compute values for new attributes
454          for (int k = 0; k < att.numValues(); k++) {
455            attributeName = 
456              new StringBuffer(att.name() + "=");
457            attributeName.append(att.value(k));
458            if (m_Numeric) {
459              newAtts.
460                addElement(new Attribute(attributeName.toString()));
461            } else {
462              vals = new FastVector(2);
463              vals.addElement("f"); vals.addElement("t");
464              newAtts.
465                addElement(new Attribute(attributeName.toString(), vals));
466            }
467          }
468        }
469      }
470    }
471    outputFormat = new Instances(getInputFormat().relationName(),
472                                 newAtts, 0);
473    outputFormat.setClassIndex(newClassIndex);
474    setOutputFormat(outputFormat);
475  }
476
477  /**
478   * Convert a single instance over if the class is nominal. The converted
479   * instance is added to the end of the output queue.
480   *
481   * @param instance the instance to convert
482   */
483  private void convertInstance(Instance instance) {
484
485    double [] vals = new double [outputFormatPeek().numAttributes()];
486    int attSoFar = 0;
487
488    for(int j = 0; j < getInputFormat().numAttributes(); j++) {
489      Attribute att = getInputFormat().attribute(j);
490      if (!att.isNominal() || (j == getInputFormat().classIndex()) ||
491          !m_Columns.isInRange(j)) {
492        vals[attSoFar] = instance.value(j);
493        attSoFar++;
494      } else {
495        if ( (att.numValues() <= 2) && (!m_TransformAll) ) {
496          vals[attSoFar] = instance.value(j);
497          attSoFar++;
498        } else {
499          if (instance.isMissing(j)) {
500            for (int k = 0; k < att.numValues(); k++) {
501              vals[attSoFar + k] = instance.value(j);
502            }
503          } else {
504            for (int k = 0; k < att.numValues(); k++) {
505              if (k == (int)instance.value(j)) {
506                vals[attSoFar + k] = 1;
507              } else {
508                vals[attSoFar + k] = 0;
509              }
510            }
511          }
512          attSoFar += att.numValues();
513        }
514      }
515    }
516    Instance inst = null;
517    if (instance instanceof SparseInstance) {
518      inst = new SparseInstance(instance.weight(), vals);
519    } else {
520      inst = new DenseInstance(instance.weight(), vals);
521    }
522    inst.setDataset(getOutputFormat());
523    copyValues(inst, false, instance.dataset(), getOutputFormat());
524    inst.setDataset(getOutputFormat());
525    push(inst);
526  }
527 
528  /**
529   * Returns the revision string.
530   *
531   * @return            the revision
532   */
533  public String getRevision() {
534    return RevisionUtils.extract("$Revision: 5987 $");
535  }
536
537  /**
538   * Main method for testing this class.
539   *
540   * @param argv should contain arguments to the filter:
541   * use -h for help
542   */
543  public static void main(String [] argv) {
544    runFilter(new NominalToBinary(), argv);
545  }
546}
Note: See TracBrowser for help on using the repository browser.