source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/SortLabels.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 14.5 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * SortLabels.java
19 * Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.filters.unsupervised.attribute;
23
24import weka.core.Attribute;
25import weka.core.Capabilities;
26import weka.core.FastVector;
27import weka.core.Instance; 
28import weka.core.DenseInstance;
29import weka.core.Instances;
30import weka.core.Option;
31import weka.core.Range;
32import weka.core.RevisionUtils;
33import weka.core.SelectedTag;
34import weka.core.Tag;
35import weka.core.Utils;
36import weka.core.Capabilities.Capability;
37import weka.filters.SimpleStreamFilter;
38
39import java.io.Serializable;
40import java.util.Collections;
41import java.util.Comparator;
42import java.util.Enumeration;
43import java.util.Vector;
44
45/**
46 <!-- globalinfo-start -->
47 * A simple filter for sorting the labels of nominal attributes.
48 * <p/>
49 <!-- globalinfo-end -->
50 *
51 <!-- options-start -->
52 * Valid options are: <p/>
53 *
54 * <pre> -D
55 *  Turns on output of debugging information.</pre>
56 *
57 * <pre> -R &lt;index1,index2-index4,...&gt;
58 *  Specify list of string attributes to convert to words.
59 *  (default: select all relational attributes)</pre>
60 *
61 * <pre> -V
62 *  Inverts the matching sense of the selection.</pre>
63 *
64 * <pre> -S &lt;CASE|NON-CASE&gt;
65 *  Determines the type of sorting:
66 *  CASE = Case-sensitive
67 *  NON-CASE = Case-insensitive
68 *  (default: CASE)</pre>
69 *
70 <!-- options-end -->
71 *
72 * @author  fracpete (fracpete at waikato dot ac dot nz)
73 * @version $Revision: 5987 $
74 */
75public class SortLabels
76  extends SimpleStreamFilter {
77
78  /** for serialization. */
79  private static final long serialVersionUID = 7815204879694105691L;
80 
81  /**
82   * Represents a case-sensitive comparator for two strings.
83   *
84   * @author  fracpete (fracpete at waikato dot ac dot nz)
85   * @version $Revision: 5987 $
86   */
87  public static class CaseSensitiveComparator
88    implements Comparator, Serializable {
89
90    /** for serialization. */
91    private static final long serialVersionUID = 7071450356783873277L;
92
93    /**
94     * compares the two strings, returns -1 if o1 is smaller than o2, 0
95     * if equal and +1 if greater.
96     *
97     * @param o1        the first string to compare
98     * @param o2        the second string to compare
99     * @return          returns -1 if o1 is smaller than o2, 0 if equal and +1
100     *                  if greater
101     */
102    public int compare(Object o1, Object o2) {
103      String    s1;
104      String    s2;
105     
106      if ((o1 == null) && (o2 == null))
107        return 0;
108      else if (o1 == null)
109        return -1;
110      else if (o2 == null)
111        return +1;
112     
113      s1 = (String) o1;
114      s2 = (String) o2;
115     
116      return s1.compareTo(s2);
117    }
118  }
119 
120  /**
121   * Represents a case-insensitive comparator for two strings.
122   *
123   * @author  fracpete (fracpete at waikato dot ac dot nz)
124   * @version $Revision: 5987 $
125   */
126  public static class CaseInsensitiveComparator
127    implements Comparator, Serializable {
128
129    /** for serialization. */
130    private static final long serialVersionUID = -4515292733342486066L;
131
132    /**
133     * compares the two strings, returns -1 if o1 is smaller than o2, 0
134     * if equal and +1 if greater.
135     *
136     * @param o1        the first string to compare
137     * @param o2        the second string to compare
138     * @return          returns -1 if o1 is smaller than o2, 0 if equal and +1
139     *                  if greater
140     */
141    public int compare(Object o1, Object o2) {
142      String    s1;
143      String    s2;
144     
145      if ((o1 == null) && (o2 == null))
146        return 0;
147      else if (o1 == null)
148        return -1;
149      else if (o2 == null)
150        return +1;
151     
152      s1 = (String) o1;
153      s2 = (String) o2;
154     
155      return s1.toLowerCase().compareTo(s2.toLowerCase());
156    }
157  }
158 
159  /** sorts the strings case-sensitive. */
160  public final static int SORT_CASESENSITIVE = 0;
161 
162  /** sorts the strings case-insensitive. */
163  public final static int SORT_CASEINSENSITIVE = 1;
164 
165  /** Tag allowing selection of sort type. */
166  public final static Tag[] TAGS_SORTTYPE = {
167    new Tag(SORT_CASESENSITIVE, "case", "Case-sensitive"),
168    new Tag(SORT_CASEINSENSITIVE, "non-case", "Case-insensitive")
169  };
170 
171  /** the range of attributes to process (only relational ones will be processed). */
172  protected Range m_AttributeIndices = new Range("first-last");
173
174  /** the new order for the labels. */
175  protected int[][] m_NewOrder = null;
176
177  /** the sort type. */
178  protected int m_SortType = SORT_CASEINSENSITIVE;
179 
180  /** the comparator to use for sorting. */
181  protected Comparator m_Comparator = new CaseSensitiveComparator();
182 
183  /**
184   * Returns a string describing this filter.
185   *
186   * @return            a description of the filter suitable for
187   *                    displaying in the explorer/experimenter gui
188   */
189  public String globalInfo() {
190    return "A simple filter for sorting the labels of nominal attributes.";
191  }
192
193  /**
194   * Returns an enumeration describing the available options.
195   *
196   * @return            an enumeration of all the available options.
197   */
198  public Enumeration listOptions() {
199    Vector              result;
200    Enumeration         en;
201    String              desc;
202    int                 i;
203    SelectedTag         tag;
204
205    result = new Vector();
206
207    en = super.listOptions();
208    while (en.hasMoreElements())
209      result.addElement(en.nextElement());
210
211    result.addElement(new Option(
212        "\tSpecify list of string attributes to convert to words.\n"
213        + "\t(default: select all relational attributes)",
214        "R", 1, "-R <index1,index2-index4,...>"));
215
216    result.addElement(new Option(
217        "\tInverts the matching sense of the selection.",
218        "V", 0, "-V"));
219
220    desc  = "";
221    for (i = 0; i < TAGS_SORTTYPE.length; i++) {
222      tag = new SelectedTag(TAGS_SORTTYPE[i].getID(), TAGS_SORTTYPE);
223      desc  +=   "\t" + tag.getSelectedTag().getIDStr() 
224               + " = " + tag.getSelectedTag().getReadable()
225               + "\n";
226    }
227    result.addElement(new Option(
228        "\tDetermines the type of sorting:\n"
229        + desc
230        + "\t(default: " + new SelectedTag(SORT_CASESENSITIVE, TAGS_SORTTYPE) + ")",
231        "S", 1, "-S " + Tag.toOptionList(TAGS_SORTTYPE)));
232
233    return result.elements();
234  }
235
236  /**
237   * Parses the options for this object. <p/>
238   *
239   <!-- options-start -->
240   * Valid options are: <p/>
241   *
242   * <pre> -D
243   *  Turns on output of debugging information.</pre>
244   *
245   * <pre> -R &lt;index1,index2-index4,...&gt;
246   *  Specify list of string attributes to convert to words.
247   *  (default: select all relational attributes)</pre>
248   *
249   * <pre> -V
250   *  Inverts the matching sense of the selection.</pre>
251   *
252   * <pre> -S &lt;CASE|NON-CASE&gt;
253   *  Determines the type of sorting:
254   *  CASE = Case-sensitive
255   *  NON-CASE = Case-insensitive
256   *  (default: CASE)</pre>
257   *
258   <!-- options-end -->
259   *
260   * @param options     the options to use
261   * @throws Exception  if setting of options fails
262   */
263  public void setOptions(String[] options) throws Exception {
264    String      tmpStr;
265
266    tmpStr = Utils.getOption('R', options);
267    if (tmpStr.length() != 0)
268      setAttributeIndices(tmpStr);
269    else
270      setAttributeIndices("first-last");
271
272    setInvertSelection(Utils.getFlag('V', options));
273
274    tmpStr = Utils.getOption('S', options);
275    if (tmpStr.length() != 0)
276      setSortType(new SelectedTag(tmpStr, TAGS_SORTTYPE));
277    else
278      setSortType(new SelectedTag(SORT_CASESENSITIVE, TAGS_SORTTYPE));
279
280    super.setOptions(options);
281  }
282
283  /**
284   * Gets the current settings of the classifier.
285   *
286   * @return            an array of strings suitable for passing to setOptions
287   */
288  public String[] getOptions() {
289    int                 i;
290    Vector<String>      result;
291    String[]            options;
292
293    result = new Vector<String>();
294
295    options = super.getOptions();
296    for (i = 0; i < options.length; i++)
297      result.add(options[i]);
298
299    result.add("-R"); 
300    result.add(getAttributeIndices().getRanges());
301
302    if (getInvertSelection())
303      result.add("-V");
304
305    result.add("-S");
306    result.add("" + getSortType());
307   
308    return result.toArray(new String[result.size()]);     
309  }
310
311  /**
312   * Returns the tip text for this property.
313   *
314   * @return            tip text for this property suitable for
315   *                    displaying in the explorer/experimenter gui
316   */
317  public String attributeIndicesTipText() {
318    return 
319        "Specify range of attributes to act on; "
320      + "this is a comma separated list of attribute indices, with "
321      + "\"first\" and \"last\" valid values; Specify an inclusive "
322      + "range with \"-\"; eg: \"first-3,5,6-10,last\".";
323  }
324 
325  /**
326   * Set the range of attributes to process.
327   *
328   * @param value       the new range.
329   */
330  public void setAttributeIndices(String value) {
331    m_AttributeIndices = new Range(value);
332  }
333
334  /**
335   * Gets the current selected attributes.
336   *
337   * @return            current selection.
338   */
339  public Range getAttributeIndices() {
340    return m_AttributeIndices;
341  }
342
343  /**
344   * Returns the tip text for this property.
345   *
346   * @return            tip text for this property suitable for
347   *                    displaying in the explorer/experimenter gui
348   */
349  public String invertSelectionTipText() {
350    return 
351        "Set attribute selection mode. If false, only selected "
352      + "attributes in the range will be worked on; if "
353      + "true, only non-selected attributes will be processed.";
354  }
355
356  /**
357   * Sets whether selected columns should be processed or skipped.
358   *
359   * @param value       the new invert setting
360   */
361  public void setInvertSelection(boolean value) {
362    m_AttributeIndices.setInvert(value);
363  }
364
365  /**
366   * Gets whether the supplied columns are to be processed or skipped.
367   *
368   * @return            true if the supplied columns will be kept
369   */
370  public boolean getInvertSelection() {
371    return m_AttributeIndices.getInvert();
372  }
373
374  /**
375   * Returns the tip text for this property.
376   *
377   * @return            tip text for this property suitable for
378   *                    displaying in the explorer/experimenter gui
379   */
380  public String sortTypeTipText() {
381    return "The type of sorting to use.";
382  }
383
384  /**
385   * Sets the sort type to be used.
386   *
387   * @param type        the type of sorting
388   */
389  public void setSortType(SelectedTag type) {
390    if (type.getTags() == TAGS_SORTTYPE) {
391      m_SortType = type.getSelectedTag().getID();
392     
393      if (m_SortType == SORT_CASESENSITIVE)
394        m_Comparator = new CaseSensitiveComparator();
395      else if (m_SortType == SORT_CASEINSENSITIVE)
396        m_Comparator = new CaseInsensitiveComparator();
397      else
398        throw new IllegalStateException("Unhandled sort type '" + type + "'!");
399    }
400  }
401
402  /**
403   * Gets the sort type to be used.
404   *
405   * @return            the sort type
406   */
407  public SelectedTag getSortType() {
408    return new SelectedTag(m_SortType, TAGS_SORTTYPE);
409  }
410
411  /**
412   * Returns the Capabilities of this filter.
413   *
414   * @return            the capabilities of this object
415   * @see               Capabilities
416   */
417  public Capabilities getCapabilities() {
418    Capabilities result = super.getCapabilities();
419    result.disableAll();
420
421    // attributes
422    result.enableAllAttributes();
423    result.enable(Capability.MISSING_VALUES);
424   
425    // class
426    result.enableAllClasses();
427    result.enable(Capability.MISSING_CLASS_VALUES);
428    result.enable(Capability.NO_CLASS);
429   
430    return result;
431  }
432 
433  /**
434   * Determines the output format based on the input format and returns
435   * this.
436   *
437   * @param inputFormat     the input format to base the output format on
438   * @return                the output format
439   * @throws Exception      in case the determination goes wrong
440   */
441  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
442    Instances           result;
443    Attribute           att;
444    Attribute           attSorted;
445    FastVector          atts;
446    FastVector          values;
447    Vector<String>      sorted;
448    int                 i;
449    int                 n;
450   
451    m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1);
452   
453    // determine sorted indices
454    atts       = new FastVector();
455    m_NewOrder = new int[inputFormat.numAttributes()][];
456    for (i = 0; i < inputFormat.numAttributes(); i++) {
457      att = inputFormat.attribute(i);
458      if (!att.isNominal() || !m_AttributeIndices.isInRange(i)) {
459        m_NewOrder[i] = new int[0];
460        atts.addElement(inputFormat.attribute(i).copy());
461        continue;
462      }
463
464      // sort labels
465      sorted = new Vector<String>();
466      for (n = 0; n < att.numValues(); n++)
467        sorted.add(att.value(n));
468      Collections.sort(sorted, m_Comparator);
469     
470      // determine new indices
471      m_NewOrder[i] = new int[att.numValues()];
472      values        = new FastVector();
473      for (n = 0; n < att.numValues(); n++) {
474        m_NewOrder[i][n] = sorted.indexOf(att.value(n));
475        values.addElement(sorted.get(n));
476      }
477      attSorted = new Attribute(att.name(), values);
478      attSorted.setWeight(att.weight());
479      atts.addElement(attSorted);
480    }
481   
482    // generate new header
483    result = new Instances(inputFormat.relationName(), atts, 0);
484    result.setClassIndex(inputFormat.classIndex());
485   
486    return result;
487  }
488
489  /**
490   * processes the given instance (may change the provided instance) and
491   * returns the modified version.
492   *
493   * @param instance    the instance to process
494   * @return            the modified data
495   * @throws Exception  in case the processing goes wrong
496   */
497  protected Instance process(Instance instance) throws Exception {
498    Instance    result;
499    Attribute   att;
500    double[]    values;
501    int         i;
502
503    // adjust indices
504    values = new double[instance.numAttributes()];
505    for (i = 0; i < instance.numAttributes(); i++) {
506      att = instance.attribute(i);
507      if (!att.isNominal() || !m_AttributeIndices.isInRange(i) || instance.isMissing(i))
508        values[i] = instance.value(i);
509      else
510        values[i] = m_NewOrder[i][(int) instance.value(i)];
511    }
512
513    // create new instance
514    result = new DenseInstance(instance.weight(), values);
515   
516    return result;
517  }
518 
519  /**
520   * Returns the revision string.
521   *
522   * @return            the revision
523   */
524  public String getRevision() {
525    return RevisionUtils.extract("$Revision: 5987 $");
526  }
527
528  /**
529   * runs the filter with the given arguments.
530   *
531   * @param args      the commandline arguments
532   */
533  public static void main(String[] args) {
534    runFilter(new SortLabels(), args);
535  }
536}
Note: See TracBrowser for help on using the repository browser.