source: branches/MetisMQI/src/main/java/weka/filters/supervised/attribute/ClassOrder.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 15.4 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    ClassOrder.java
19 *    Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
20 *
21 */
22package weka.filters.supervised.attribute;
23
24import weka.core.Attribute;
25import weka.core.Capabilities;
26import weka.core.FastVector;
27import weka.core.Instance;
28import weka.core.Instances;
29import weka.core.Option;
30import weka.core.OptionHandler;
31import weka.core.RevisionUtils;
32import weka.core.Utils;
33import weka.core.Capabilities.Capability;
34import weka.filters.Filter;
35import weka.filters.SupervisedFilter;
36
37import java.util.Enumeration;
38import java.util.Random;
39import java.util.Vector;
40
41/**
42 <!-- globalinfo-start -->
43 * Changes the order of the classes so that the class values are no longer of in the order specified in the header. The values will be in the order specified by the user -- it could be either in ascending/descending order by the class frequency or in random order. Note that this filter currently does not change the header, only the class values of the instances, so there is not much point in using it in conjunction with the FilteredClassifier. The value can also be converted back using 'originalValue(double value)' procedure.
44 * <p/>
45 <!-- globalinfo-end -->
46 *
47 <!-- options-start -->
48 * Valid options are: <p/>
49 *
50 * <pre> -R &lt;seed&gt;
51 *  Specify the seed of randomization
52 *  used to randomize the class
53 *  order (default: 1)</pre>
54 *
55 * <pre> -C &lt;order&gt;
56 *  Specify the class order to be
57 *  sorted, could be 0: ascending
58 *  1: descending and 2: random.(default: 0)</pre>
59 *
60 <!-- options-end -->
61 *
62 * @author Xin Xu (xx5@cs.waikato.ac.nz)
63 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
64 * @version $Revision: 5491 $
65 */
66public class ClassOrder 
67  extends Filter
68  implements SupervisedFilter, OptionHandler {
69   
70  /** for serialization */
71  static final long serialVersionUID = -2116226838887628411L;
72 
73  /** The seed of randomization */
74  private long m_Seed = 1;
75   
76  /** The random object */
77  private Random m_Random = null;
78   
79  /**
80   * The 1-1 converting table from the original class values
81   * to the new values
82   */
83  private int[] m_Converter = null;
84   
85  /** Class attribute of the data */
86  private Attribute m_ClassAttribute = null;
87
88  /** The class order to be sorted */
89  private int m_ClassOrder = 0;
90   
91  /** The class values are sorted in ascending order based on their frequencies */
92  public static final int FREQ_ASCEND = 0;
93
94  /** The class values are sorted in descending order based on their frequencies */
95  public static final int FREQ_DESCEND = 1;
96   
97  /** The class values are sorted in random order*/
98  public static final int RANDOM =2;
99
100  /** This class can provide the class distribution in the sorted order
101   *  as side effect */
102  private double[] m_ClassCounts = null;
103
104  /**
105   * Returns a string describing this filter
106   *
107   * @return a description of the filter suitable for
108   * displaying in the explorer/experimenter gui
109   */
110  public String globalInfo() {
111
112    return "Changes the order of the classes so that the class values are " 
113      + "no longer of in the order specified in the header. "
114      + "The values will be in the order specified by the user "
115      + "-- it could be either in ascending/descending order by the class "
116      + "frequency or in random order. Note that this filter currently does not "
117      + "change the header, only the class values of the instances, "
118      + "so there is not much point in using it in conjunction with the "
119      + "FilteredClassifier. The value can also be converted back using "
120      + "'originalValue(double value)' procedure.";
121  }
122   
123  /**
124   * Returns an enumeration describing the available options.
125   *
126   * @return an enumeration of all the available options.
127   */
128  public Enumeration listOptions() {
129       
130    Vector newVector = new Vector(1);
131       
132    newVector.addElement(new Option("\tSpecify the seed of randomization\n"
133                                    + "\tused to randomize the class\n"
134                                    + "\torder (default: 1)",
135                                    "R", 1, "-R <seed>"));
136       
137    newVector.addElement(new Option("\tSpecify the class order to be\n"
138                                    + "\tsorted, could be 0: ascending\n"
139                                    + "\t1: descending and 2: random.(default: 0)",
140                                    "C", 1, "-C <order>"));
141       
142    return newVector.elements();
143  }
144   
145   
146  /**
147   * Parses a given list of options. <p/>
148   *
149   <!-- options-start -->
150   * Valid options are: <p/>
151   *
152   * <pre> -R &lt;seed&gt;
153   *  Specify the seed of randomization
154   *  used to randomize the class
155   *  order (default: 1)</pre>
156   *
157   * <pre> -C &lt;order&gt;
158   *  Specify the class order to be
159   *  sorted, could be 0: ascending
160   *  1: descending and 2: random.(default: 0)</pre>
161   *
162   <!-- options-end -->
163   *
164   * @param options the list of options as an array of strings
165   * @throws Exception if an option is not supported
166   */
167  public void setOptions(String[] options) throws Exception {
168       
169    String seedString = Utils.getOption('R', options);
170    if (seedString.length() != 0)
171      m_Seed = Long.parseLong(seedString);
172    else 
173      m_Seed = 1; 
174       
175    String orderString = Utils.getOption('C', options);
176    if (orderString.length() != 0)
177      m_ClassOrder = Integer.parseInt(orderString);
178    else 
179      m_ClassOrder = FREQ_ASCEND;       
180       
181    if (getInputFormat() != null)
182      setInputFormat(getInputFormat());         
183       
184    m_Random = null;
185  }
186       
187  /**
188   * Gets the current settings of the filter.
189   *
190   * @return an array of strings suitable for passing to setOptions
191   */
192  public String [] getOptions() {
193       
194    String [] options = new String [4];
195    int current = 0;
196       
197    options[current++] = "-R"; 
198    options[current++] = "" + m_Seed;
199    options[current++] = "-C"; 
200    options[current++] = "" + m_ClassOrder;
201       
202    while (current < options.length) {
203      options[current++] = "";
204    }
205    return options;
206  }
207   
208  /**
209   * Returns the tip text for this property
210   *
211   * @return tip text for this property suitable for
212   * displaying in the explorer/experimenter gui
213   */
214  public String seedTipText() {
215    return "Specify the seed of randomization of the class order";
216  }
217   
218  /**
219   * Get the current randomization seed
220   *
221   * @return a seed
222   */
223  public long getSeed() {       
224    return m_Seed;
225  }
226
227  /**
228   * Set randomization seed
229   *
230   * @param seed the set seed
231   */
232  public void setSeed(long seed){
233    m_Seed = seed;
234    m_Random = null;
235  }
236   
237  /**
238   * Returns the tip text for this property
239   *
240   * @return tip text for this property suitable for
241   * displaying in the explorer/experimenter gui
242   */
243  public String classOrderTipText() {
244    return "Specify the class order after the filtering";
245  }
246   
247  /**
248   * Get the wanted class order
249   *
250   * @return class order
251   */
252  public int getClassOrder() { 
253    return m_ClassOrder;
254  }
255   
256  /**
257   * Set the wanted class order
258   *
259   * @param order the class order
260   */
261  public void setClassOrder(int order){
262    m_ClassOrder = order;
263  }
264
265  /**
266   * Returns the Capabilities of this filter.
267   *
268   * @return            the capabilities of this object
269   * @see               Capabilities
270   */
271  public Capabilities getCapabilities() {
272    Capabilities result = super.getCapabilities();
273    result.disableAll();
274
275    // attributes
276    result.enableAllAttributes();
277    result.enable(Capability.MISSING_VALUES);
278   
279    // class
280    result.enable(Capability.NOMINAL_CLASS);
281   
282    return result;
283  }
284   
285  /**
286   * Sets the format of the input instances.
287   *
288   * @param instanceInfo an Instances object containing the input instance
289   * structure (any instances contained in the object are ignored - only the
290   * structure is required).
291   * @return true if the outputFormat may be collected immediately
292   * @throws Exception if no class index set or class not nominal
293   */
294  public boolean setInputFormat(Instances instanceInfo) throws Exception {     
295
296    super.setInputFormat(new Instances(instanceInfo, 0));       
297
298    m_ClassAttribute = instanceInfo.classAttribute();   
299    m_Random = new Random(m_Seed);
300    m_Converter = null;
301   
302    int numClasses = instanceInfo.numClasses();
303    m_ClassCounts = new double[numClasses];     
304    return false;
305  }   
306   
307  /**
308   * Input an instance for filtering. Ordinarily the instance is processed
309   * and made available for output immediately. Some filters require all
310   * instances be read before producing output.
311   *
312   * @param instance the input instance
313   * @return true if the filtered instance may now be
314   * collected with output().
315   * @throws IllegalStateException if no input format has been defined.
316   */
317  public boolean input(Instance instance) {
318       
319    if (getInputFormat() == null) {
320      throw new IllegalStateException("No input instance format defined");
321    }
322    if (m_NewBatch) {
323      resetQueue();
324      m_NewBatch = false;     
325    }   
326   
327    // In case some one use this routine in testing,
328    // although he/she should not do so
329    if(m_Converter != null){
330      Instance datum = (Instance)instance.copy();
331      if (!datum.isMissing(m_ClassAttribute)){
332        datum.setClassValue((double)m_Converter[(int)datum.classValue()]);
333      }
334      push(datum);
335      return true;
336    }
337   
338    if (!instance.isMissing(m_ClassAttribute)) {
339      m_ClassCounts[(int)instance.classValue()] += instance.weight();
340    }
341
342    bufferInput(instance);
343    return false;
344  }
345 
346  /**
347   * Signify that this batch of input to the filter is finished. If
348   * the filter requires all instances prior to filtering, output()
349   * may now be called to retrieve the filtered instances. Any
350   * subsequent instances filtered should be filtered based on setting
351   * obtained from the first batch (unless the inputFormat has been
352   * re-assigned or new options have been set). This implementation
353   * sorts the class values and provide class counts in the output format
354   *
355   * @return true if there are instances pending output
356   * @throws IllegalStateException if no input structure has been defined,
357   * @throws Exception if there was a problem finishing the batch.
358   */
359  public boolean batchFinished() throws Exception {
360
361    Instances data = getInputFormat();
362    if (data == null)
363      throw new IllegalStateException("No input instance format defined");
364
365    if (m_Converter == null) {
366
367      // Get randomized indices and class counts
368      int[] randomIndices = new int[m_ClassCounts.length];
369      for (int i = 0; i < randomIndices.length; i++) {
370        randomIndices[i] = i;
371      }
372      for (int j = randomIndices.length - 1; j > 0; j--) {
373        int toSwap = m_Random.nextInt(j + 1);
374        int tmpIndex = randomIndices[j];
375        randomIndices[j] = randomIndices[toSwap];
376        randomIndices[toSwap] = tmpIndex;
377      }
378     
379      double[] randomizedCounts = new double[m_ClassCounts.length];
380      for (int i = 0; i < randomizedCounts.length; i++) {
381        randomizedCounts[i] = m_ClassCounts[randomIndices[i]];
382      } 
383
384      // Create new order. For the moment m_Converter converts new indices
385      // into old ones.
386      if (m_ClassOrder == RANDOM) {
387        m_Converter = randomIndices;
388        m_ClassCounts = randomizedCounts;
389      } else {
390        int[] sorted = Utils.sort(randomizedCounts);
391        m_Converter = new int[sorted.length];
392        if (m_ClassOrder == FREQ_ASCEND) {
393          for (int i = 0; i < sorted.length; i++) {
394            m_Converter[i] = randomIndices[sorted[i]];
395          }
396        } else if (m_ClassOrder == FREQ_DESCEND) {
397          for (int i = 0; i < sorted.length; i++) {
398            m_Converter[i] = randomIndices[sorted[sorted.length - i - 1]];
399          }
400        } else {
401          throw new IllegalArgumentException("Class order not defined!");
402        }
403       
404        // Change class counts
405        double[] tmp2 = new double[m_ClassCounts.length];
406        for (int i = 0; i < m_Converter.length; i++) {
407          tmp2[i] = m_ClassCounts[m_Converter[i]];
408        }
409        m_ClassCounts = tmp2;
410      }
411     
412      // Change the class values
413      FastVector values = new FastVector(data.classAttribute().numValues());
414      for (int i = 0; i < data.numClasses(); i++) {
415        values.addElement(data.classAttribute().value(m_Converter[i]));
416      }
417      FastVector newVec = new FastVector(data.numAttributes());
418      for (int i = 0; i < data.numAttributes(); i++) {
419        if (i == data.classIndex()) {
420          newVec.addElement(new Attribute(data.classAttribute().name(), values, 
421                                          data.classAttribute().getMetadata()));
422        } else {
423          newVec.addElement(data.attribute(i));
424        }
425      }
426      Instances newInsts = new Instances(data.relationName(), newVec, 0);
427      newInsts.setClassIndex(data.classIndex());
428      setOutputFormat(newInsts);
429
430      // From now on we need m_Converter to convert old indices into new ones
431      int[] temp = new int[m_Converter.length];
432      for (int i = 0; i < temp.length; i++) {
433        temp[m_Converter[i]] = i;
434      }
435      m_Converter = temp;
436
437      // Process all instances
438      for(int xyz=0; xyz<data.numInstances(); xyz++){
439        Instance datum = data.instance(xyz);
440        if (!datum.isMissing(datum.classIndex())) {
441          datum.setClassValue((double)m_Converter[(int)datum.classValue()]);
442        }
443        push(datum);
444      }
445    }
446    flushInput();
447    m_NewBatch = true;
448    return (numPendingOutput() != 0);
449  }
450   
451  /**
452   * Get the class distribution of the sorted class values.  If class is numeric
453   * it returns null
454   *
455   * @return the class counts
456   */
457  public double[] getClassCounts(){ 
458
459    if(m_ClassAttribute.isNominal())
460      return m_ClassCounts; 
461    else
462      return null;
463  }
464
465  /**
466   * Convert the given class distribution back to the distributions
467   * with the original internal class index
468   *
469   * @param before the given class distribution
470   * @return the distribution converted back
471   */
472  public double[] distributionsByOriginalIndex (double[] before){
473
474    double[] after = new double[m_Converter.length];
475    for(int i=0; i < m_Converter.length; i++) 
476      after[i] = before[m_Converter[i]];
477   
478    return after;
479  }
480
481  /**
482   * Return the original internal class value given the randomized
483   * class value, i.e. the string presentations of the two indices
484   * are the same.  It's useful when the filter is used within a classifier 
485   * so that the filtering procedure should be transparent to the
486   * evaluation
487   *
488   * @param value the given value
489   * @return the original internal value, -1 if not found
490   * @throws Exception if the coverter table is not set yet
491   */
492  public double originalValue(double value)throws Exception{
493
494    if(m_Converter == null)
495      throw new IllegalStateException("Coverter table not defined yet!");
496       
497    for(int i=0; i < m_Converter.length; i++)
498      if((int)value == m_Converter[i])
499        return (double)i;
500
501    return -1;
502  }   
503 
504  /**
505   * Returns the revision string.
506   *
507   * @return            the revision
508   */
509  public String getRevision() {
510    return RevisionUtils.extract("$Revision: 5491 $");
511  }
512
513  /**
514   * Main method for testing this class.
515   *
516   * @param argv should contain arguments to the filter: use -h for help
517   */
518  public static void main(String [] argv) {
519    runFilter(new ClassOrder(), argv);
520  }
521}
Note: See TracBrowser for help on using the repository browser.