source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/PartitionedMultiFilter.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 19.7 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * PartitionedMultiFilter.java
19 * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.filters.unsupervised.attribute;
24
25import weka.core.Attribute;
26import weka.core.Capabilities;
27import weka.core.FastVector;
28import weka.core.Instance; 
29import weka.core.DenseInstance;
30import weka.core.Instances;
31import weka.core.Option;
32import weka.core.OptionHandler;
33import weka.core.Range;
34import weka.core.RevisionUtils;
35import weka.core.SparseInstance;
36import weka.core.Utils;
37import weka.core.Capabilities.Capability;
38import weka.filters.AllFilter;
39import weka.filters.Filter;
40import weka.filters.SimpleBatchFilter;
41
42import java.util.Enumeration;
43import java.util.Vector;
44
45/**
46 <!-- globalinfo-start -->
47 * A filter that applies filters on subsets of attributes and assembles the output into a new dataset. Attributes that are not covered by any of the ranges can be either retained or removed from the output.
48 * <p/>
49 <!-- globalinfo-end -->
50 *
51 <!-- options-start -->
52 * Valid options are: <p/>
53 *
54 * <pre> -D
55 *  Turns on output of debugging information.</pre>
56 *
57 * <pre> -F &lt;classname [options]&gt;
58 *  A filter to apply (can be specified multiple times).</pre>
59 *
60 * <pre> -R &lt;range&gt;
61 *  An attribute range (can be specified multiple times).
62 *  For each filter a range must be supplied. 'first' and 'last'
63 *  are valid indices.</pre>
64 *
65 * <pre> -U
66 *  Flag for leaving unused attributes out of the output, by default
67 *  these are included in the filter output.</pre>
68 *
69 <!-- options-end -->
70 *
71 * @author  FracPete (fracpete at waikato dot ac dot nz)
72 * @version $Revision: 5987 $
73 * @see     weka.filters.StreamableFilter
74 */
75public class PartitionedMultiFilter
76  extends SimpleBatchFilter {
77
78  /** for serialization */
79  private static final long serialVersionUID = -6293720886005713120L;
80
81  /** The filters */
82  protected Filter m_Filters[] = {new AllFilter()};
83 
84  /** The attribute ranges */
85  protected Range m_Ranges[] = {new Range("first-last")};
86 
87  /** Whether unused attributes are left out of the output */
88  protected boolean m_RemoveUnused = false;
89 
90  /** the indices of the unused attributes */
91  protected int[] m_IndicesUnused = new int[0];
92 
93  /**
94   * Returns a string describing this filter
95   * @return            a description of the filter suitable for
96   *                    displaying in the explorer/experimenter gui
97   */
98  public String globalInfo() {
99    return 
100        "A filter that applies filters on subsets of attributes and "
101      + "assembles the output into a new dataset. Attributes that are "
102      + "not covered by any of the ranges can be either retained or removed "
103      + "from the output.";
104  }
105
106  /**
107   * Returns an enumeration describing the available options.
108   *
109   * @return            an enumeration of all the available options.
110   */
111  public Enumeration listOptions() {
112    Vector result = new Vector();
113    Enumeration enm = super.listOptions();
114    while (enm.hasMoreElements())
115      result.add(enm.nextElement());
116     
117    result.addElement(new Option(
118        "\tA filter to apply (can be specified multiple times).",
119        "F", 1, "-F <classname [options]>"));
120
121    result.addElement(new Option(
122        "\tAn attribute range (can be specified multiple times).\n"
123        + "\tFor each filter a range must be supplied. 'first' and 'last'\n"
124        + "\tare valid indices.",
125        "R", 1, "-R <range>"));
126
127    result.addElement(new Option(
128        "\tFlag for leaving unused attributes out of the output, by default\n"
129        + "\tthese are included in the filter output.",
130        "U", 0, "-U"));
131
132    return result.elements();
133  }
134
135  /**
136   * Parses a list of options for this object. <p/>
137   *
138   <!-- options-start -->
139   * Valid options are: <p/>
140   *
141   * <pre> -D
142   *  Turns on output of debugging information.</pre>
143   *
144   * <pre> -F &lt;classname [options]&gt;
145   *  A filter to apply (can be specified multiple times).</pre>
146   *
147   * <pre> -R &lt;range&gt;
148   *  An attribute range (can be specified multiple times).
149   *  For each filter a range must be supplied. 'first' and 'last'
150   *  are valid indices.</pre>
151   *
152   * <pre> -U
153   *  Flag for leaving unused attributes out of the output, by default
154   *  these are included in the filter output.</pre>
155   *
156   <!-- options-end -->
157   *
158   * @param options     the list of options as an array of strings
159   * @throws Exception  if an option is not supported
160   */
161  public void setOptions(String[] options) throws Exception {
162    String        tmpStr;
163    String        classname;
164    String[]      options2;
165    Vector        objects;
166
167    super.setOptions(options);
168   
169    setRemoveUnused(Utils.getFlag("U", options));
170   
171    objects = new Vector();
172    while ((tmpStr = Utils.getOption("F", options)).length() != 0) {
173      options2    = Utils.splitOptions(tmpStr);
174      classname      = options2[0];
175      options2[0] = "";
176      objects.add(Utils.forName(Filter.class, classname, options2));
177    }
178
179    // at least one filter
180    if (objects.size() == 0)
181      objects.add(new AllFilter());
182
183    setFilters((Filter[]) objects.toArray(new Filter[objects.size()]));
184   
185    objects = new Vector();
186    while ((tmpStr = Utils.getOption("R", options)).length() != 0) {
187      objects.add(new Range(tmpStr));
188    }
189
190    // at least one Range
191    if (objects.size() == 0)
192      objects.add(new Range("first-last"));
193
194    setRanges((Range[]) objects.toArray(new Range[objects.size()]));
195   
196    // is number of filters the same as ranges?
197    checkDimensions();
198  }
199
200  /**
201   * Gets the current settings of the filter.
202   *
203   * @return            an array of strings suitable for passing to setOptions
204   */
205  public String[] getOptions() {
206    Vector      result;
207    String[]    options;
208    int         i;
209
210    result = new Vector();
211
212    options = super.getOptions();
213    for (i = 0; i < options.length; i++)
214      result.add(options[i]);
215   
216    if (getRemoveUnused())
217      result.add("-U");
218   
219    for (i = 0; i < getFilters().length; i++) {
220      result.add("-F");
221      result.add(getFilterSpec(getFilter(i)));
222    }
223
224    for (i = 0; i < getRanges().length; i++) {
225      result.add("-R");
226      result.add("" + getRange(i).getRanges());
227    }
228
229    return (String[]) result.toArray(new String[result.size()]);
230  }
231
232  /**
233   * checks whether the dimensions of filters and ranges fit together
234   *
235   * @throws Exception  if dimensions differ
236   */
237  protected void checkDimensions() throws Exception {
238    if (getFilters().length != getRanges().length)
239      throw new IllegalArgumentException(
240          "Number of filters (= " + getFilters().length + ") "
241          + "and ranges (= " + getRanges().length + ") don't match!");
242  }
243 
244  /**
245   * Returns the Capabilities of this filter.
246   *
247   * @return            the capabilities of this object
248   * @see               Capabilities
249   */
250  public Capabilities getCapabilities() {
251    Capabilities        result;
252   
253    if (getFilters().length == 0) {
254      result = super.getCapabilities();
255      result.disableAll();
256    } else {
257      result = getFilters()[0].getCapabilities();
258    }
259   
260    // disable attributes
261    result.disable(Capability.STRING_ATTRIBUTES);
262    result.disableDependency(Capability.STRING_ATTRIBUTES);
263    result.disable(Capability.RELATIONAL_ATTRIBUTES);
264    result.disableDependency(Capability.RELATIONAL_ATTRIBUTES);
265   
266    return result;
267  }
268
269  /**
270   * Sets whether unused attributes (ones that are not covered by any of the
271   * ranges) are removed from the output.
272   *
273   * @param value       if true then the unused attributes get removed
274   */
275  public void setRemoveUnused(boolean value) {
276    m_RemoveUnused = value;
277  }
278 
279  /**
280   * Gets whether unused attributes (ones that are not covered by any of the
281   * ranges) are removed from the output.
282   *
283   * @return            true if unused attributes are removed
284   */
285  public boolean getRemoveUnused() {
286    return m_RemoveUnused;
287  }
288 
289  /**
290   * Returns the tip text for this property
291   *
292   * @return            tip text for this property suitable for
293   *                    displaying in the explorer/experimenter gui
294   */
295  public String removeUnusedTipText() {
296    return 
297        "If true then unused attributes (ones that are not covered by any "
298      + "of the ranges) will be removed from the output.";
299  }
300 
301  /**
302   * Sets the list of possible filters to choose from.
303   * Also resets the state of the filter (this reset doesn't affect the
304   * options).
305   *
306   * @param filters     an array of filters with all options set.
307   * @see #reset()
308   */
309  public void setFilters(Filter[] filters) {
310    m_Filters = filters;
311    reset();
312  }
313
314  /**
315   * Gets the list of possible filters to choose from.
316   *
317   * @return            the array of Filters
318   */
319  public Filter[] getFilters() {
320    return m_Filters;
321  }
322 
323  /**
324   * Returns the tip text for this property
325   *
326   * @return            tip text for this property suitable for
327   *                    displaying in the explorer/experimenter gui
328   */
329  public String filtersTipText() {
330    return "The base filters to be used.";
331  }
332 
333  /**
334   * Gets a single filter from the set of available filters.
335   *
336   * @param index       the index of the filter wanted
337   * @return            the Filter
338   */
339  public Filter getFilter(int index) {
340    return m_Filters[index];
341  }
342
343  /**
344   * returns the filter classname and the options as one string
345   *
346   * @param filter      the filter to get the specs for
347   * @return            the classname plus options
348   */
349  protected String getFilterSpec(Filter filter) {
350    String        result;
351
352    if (filter == null) {
353      result = "";
354    }
355    else {
356      result  = filter.getClass().getName();
357      if (filter instanceof OptionHandler)
358        result += " " 
359          + Utils.joinOptions(((OptionHandler) filter).getOptions());
360    }
361
362    return result;
363  }
364
365  /**
366   * Sets the list of possible Ranges to choose from.
367   * Also resets the state of the Range (this reset doesn't affect the
368   * options).
369   *
370   * @param Ranges      an array of Ranges with all options set.
371   * @see #reset()
372   */
373  public void setRanges(Range[] Ranges) {
374    m_Ranges = Ranges;
375    reset();
376  }
377
378  /**
379   * Gets the list of possible Ranges to choose from.
380   *
381   * @return            the array of Ranges
382   */
383  public Range[] getRanges() {
384    return m_Ranges;
385  }
386 
387  /**
388   * Returns the tip text for this property
389   *
390   * @return            tip text for this property suitable for
391   *                    displaying in the explorer/experimenter gui
392   */
393  public String rangesTipText() {
394    return "The attribute ranges to be used.";
395  }
396 
397  /**
398   * Gets a single Range from the set of available Ranges.
399   *
400   * @param index       the index of the Range wanted
401   * @return            the Range
402   */
403  public Range getRange(int index) {
404    return m_Ranges[index];
405  }
406 
407  /**
408   * determines the indices of unused attributes (ones that are not covered
409   * by any of the range)
410   *
411   * @param data        the data to base the determination on
412   * @see               #m_IndicesUnused
413   */
414  protected void determineUnusedIndices(Instances data) {
415    Vector<Integer>     indices;
416    int                 i;
417    int                 n;
418    boolean             covered;
419   
420    // traverse all ranges
421    indices = new Vector<Integer>();
422    for (i = 0; i < data.numAttributes(); i++) {
423      if (i == data.classIndex())
424        continue;
425     
426      covered = false;
427      for (n = 0; n < getRanges().length; n++) {
428        if (getRanges()[n].isInRange(i)) {
429          covered = true;
430          break;
431        }
432      }
433     
434      if (!covered)
435        indices.add(new Integer(i));
436    }
437   
438    // create array
439    m_IndicesUnused = new int[indices.size()];
440    for (i = 0; i < indices.size(); i++)
441      m_IndicesUnused[i] = indices.get(i).intValue();
442   
443    if (getDebug())
444      System.out.println(
445          "Unused indices: " + Utils.arrayToString(m_IndicesUnused));
446  }
447 
448  /**
449   * generates a subset of the dataset with only the attributes from the range
450   * (class is always added if present)
451   *
452   * @param data        the data to work on
453   * @param range       the range of attribute to use
454   * @return            the generated subset
455   * @throws Exception  if creation fails
456   */
457  protected Instances generateSubset(Instances data, Range range) throws Exception {
458    Remove      filter;
459    String      atts;
460    Instances   result;
461 
462    // determine attributes
463    atts = range.getRanges();
464    if ((data.classIndex() > -1) && (!range.isInRange(data.classIndex())))
465      atts += "," + (data.classIndex() + 1);
466   
467    // setup filter
468    filter = new Remove();
469    filter.setAttributeIndices(atts);
470    filter.setInvertSelection(true);
471    filter.setInputFormat(data);
472   
473    // generate output
474    result = Filter.useFilter(data, filter);
475   
476    return result;
477  }
478 
479  /**
480   * renames all the attributes in the dataset (excluding the class if present)
481   * by adding the prefix to the name.
482   *
483   * @param data        the data to work on
484   * @param prefix      the prefix for the attributes
485   * @return            a copy of the data with the attributes renamed
486   * @throws Exception  if renaming fails
487   */
488  protected Instances renameAttributes(Instances data, String prefix) throws Exception {
489    Instances   result;
490    int         i;
491    FastVector  atts;
492   
493    // rename attributes
494    atts = new FastVector();
495    for (i = 0; i < data.numAttributes(); i++) {
496      if (i == data.classIndex())
497        atts.addElement(data.attribute(i).copy());
498      else
499        atts.addElement(data.attribute(i).copy(prefix + data.attribute(i).name()));
500    }
501   
502    // create new dataset
503    result = new Instances(data.relationName(), atts, data.numInstances());
504    for (i = 0; i < data.numInstances(); i++) {
505      result.add((Instance) data.instance(i).copy());
506    }
507   
508    // set class if present
509    if (data.classIndex() > -1)
510      result.setClassIndex(data.classIndex());
511   
512    return result;
513  }
514 
515  /**
516   * Determines the output format based only on the full input dataset and
517   * returns this otherwise null is returned. In case the output format cannot
518   * be returned immediately, i.e., immediateOutputFormat() returns false,
519   * then this method will be called from batchFinished().
520   *
521   * @param inputFormat     the input format to base the output format on
522   * @return                the output format
523   * @throws Exception      in case the determination goes wrong
524   * @see                   #hasImmediateOutputFormat()
525   * @see                   #batchFinished()
526   */
527  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
528    Instances   result;
529    Instances   processed;
530    int         i;
531    int         n;
532    FastVector  atts;
533    Attribute   att;
534   
535    if (!isFirstBatchDone()) {
536      // we need the full dataset here, see process(Instances)
537      if (inputFormat.numInstances() == 0)
538        return null;
539
540      checkDimensions();
541
542      // determine unused indices
543      determineUnusedIndices(inputFormat);
544
545      atts = new FastVector();
546      for (i = 0; i < getFilters().length; i++) {
547        if (!isFirstBatchDone()) {
548          // generate subset
549          processed = generateSubset(inputFormat, getRange(i));
550          // set input format
551          if (!getFilter(i).setInputFormat(processed))
552            Filter.useFilter(processed, getFilter(i));
553        }
554
555        // get output format
556        processed = getFilter(i).getOutputFormat();
557
558        // rename attributes
559        processed = renameAttributes(processed, "filtered-" + i + "-");
560
561        // add attributes
562        for (n = 0; n < processed.numAttributes(); n++) {
563          if (n == processed.classIndex())
564            continue;
565          atts.addElement(processed.attribute(n).copy());
566        }
567      }
568
569      // add unused attributes
570      if (!getRemoveUnused()) {
571        for (i = 0; i < m_IndicesUnused.length; i++) {
572          att = inputFormat.attribute(m_IndicesUnused[i]);
573          atts.addElement(att.copy("unfiltered-" + att.name()));
574        }
575      }
576
577      // add class if present
578      if (inputFormat.classIndex() > -1)
579        atts.addElement(inputFormat.classAttribute().copy());
580
581      // generate new dataset
582      result = new Instances(inputFormat.relationName(), atts, 0);
583      if (inputFormat.classIndex() > -1)
584        result.setClassIndex(result.numAttributes() - 1);
585    }
586    else {
587      result = getOutputFormat();
588    }
589   
590    return result;
591  }
592
593  /**
594   * Processes the given data (may change the provided dataset) and returns
595   * the modified version. This method is called in batchFinished().
596   *
597   * @param instances   the data to process
598   * @return            the modified data
599   * @throws Exception  in case the processing goes wrong
600   * @see               #batchFinished()
601   */
602  protected Instances process(Instances instances) throws Exception {
603    Instances           result;
604    int                 i;
605    int                 n;
606    int                 m;
607    int                 index;
608    Instances[]         processed;
609    Instance            inst;
610    Instance            newInst;
611    double[]            values;
612    Vector              errors;
613
614    if (!isFirstBatchDone()) {
615      checkDimensions();
616
617      // set upper limits
618      for (i = 0; i < m_Ranges.length; i++)
619        m_Ranges[i].setUpper(instances.numAttributes() - 1);
620
621      // determine unused indices
622      determineUnusedIndices(instances);
623    }
624
625    // pass data through all filters
626    processed = new Instances[getFilters().length];
627    for (i = 0; i < getFilters().length; i++) {
628      processed[i] = generateSubset(instances, getRange(i));
629      if (!isFirstBatchDone())
630        getFilter(i).setInputFormat(processed[i]);
631      processed[i] = Filter.useFilter(processed[i], getFilter(i));
632    }
633
634    // set output format (can only be determined with full dataset, hence here)
635    if (!isFirstBatchDone()) {
636      result = determineOutputFormat(instances);
637      setOutputFormat(result);
638    }
639    else {
640      result = getOutputFormat();
641    }
642   
643    // check whether all filters didn't change the number of instances
644    errors = new Vector();
645    for (i = 0; i < processed.length; i++) {
646      if (processed[i].numInstances() != instances.numInstances())
647        errors.add(new Integer(i));
648    }
649    if (errors.size() > 0)
650      throw new IllegalStateException(
651          "The following filter(s) changed the number of instances: " + errors);
652   
653    // assemble data
654    for (i = 0; i < instances.numInstances(); i++) {
655      inst   = instances.instance(i);
656      values = new double[result.numAttributes()];
657
658      // filtered data
659      index = 0;
660      for (n = 0; n < processed.length; n++) {
661        for (m = 0; m < processed[n].numAttributes(); m++) {
662          if (m == processed[n].classIndex())
663            continue;
664          values[index] = processed[n].instance(i).value(m);
665          index++;
666        }
667      }
668     
669      // unused attributes
670      if (!getRemoveUnused()) {
671        for (n = 0; n < m_IndicesUnused.length; n++) {
672          values[index] = inst.value(m_IndicesUnused[n]);
673          index++;
674        }
675      }
676     
677      // class
678      if (instances.classIndex() > -1)
679        values[values.length - 1] = inst.value(instances.classIndex());
680
681      // generate and add instance
682      if (inst instanceof SparseInstance)
683        newInst = new SparseInstance(instances.instance(i).weight(), values);
684      else
685        newInst = new DenseInstance(instances.instance(i).weight(), values);
686      result.add(newInst);
687    }
688   
689    return result;
690  }
691 
692  /**
693   * Returns the revision string.
694   *
695   * @return            the revision
696   */
697  public String getRevision() {
698    return RevisionUtils.extract("$Revision: 5987 $");
699  }
700
701  /**
702   * Main method for executing this class.
703   *
704   * @param args should contain arguments for the filter: use -h for help
705   */
706  public static void main(String[] args) {
707    runFilter(new PartitionedMultiFilter(), args);
708  }
709}
Note: See TracBrowser for help on using the repository browser.