source: tags/MetisMQIDemo/src/main/java/weka/filters/unsupervised/attribute/PrincipalComponents.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 23.6 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * PrincipalComponents.java
19 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.filters.unsupervised.attribute;
23
24import weka.core.Attribute;
25import weka.core.Capabilities;
26import weka.core.FastVector;
27import weka.core.Instance; 
28import weka.core.DenseInstance;
29import weka.core.Instances;
30import weka.core.Option;
31import weka.core.OptionHandler;
32import weka.core.RevisionUtils;
33import weka.core.SparseInstance;
34import weka.core.Utils;
35import weka.core.Capabilities.Capability;
36import weka.core.matrix.EigenvalueDecomposition;
37import weka.core.matrix.Matrix;
38import weka.filters.Filter;
39import weka.filters.UnsupervisedFilter;
40
41import java.util.Enumeration;
42import java.util.Vector;
43
44/**
45 <!-- globalinfo-start -->
46 * Performs a principal components analysis and transformation of the data.<br/>
47 * Dimensionality reduction is accomplished by choosing enough eigenvectors to account for some percentage of the variance in the original data -- default 0.95 (95%).<br/>
48 * Based on code of the attribute selection scheme 'PrincipalComponents' by Mark Hall and Gabi Schmidberger.
49 * <p/>
50 <!-- globalinfo-end -->
51 *
52 <!-- options-start -->
53 * Valid options are: <p/>
54 *
55 * <pre> -D
56 *  Don't normalize input data.</pre>
57 *
58 * <pre> -R &lt;num&gt;
59 *  Retain enough PC attributes to account
60 *  for this proportion of variance in the original data.
61 *  (default: 0.95)</pre>
62 *
63 * <pre> -A &lt;num&gt;
64 *  Maximum number of attributes to include in
65 *  transformed attribute names.
66 *  (-1 = include all, default: 5)</pre>
67 *
68 * <pre> -M &lt;num&gt;
69 *  Maximum number of PC attributes to retain.
70 *  (-1 = include all, default: -1)</pre>
71 *
72 <!-- options-end -->
73 *
74 * @author Mark Hall (mhall@cs.waikato.ac.nz) -- attribute selection code
75 * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) -- attribute selection code
76 * @author fracpete (fracpete at waikato dot ac dot nz) -- filter code
77 * @version $Revision: 5987 $
78 */
79public class PrincipalComponents
80  extends Filter
81  implements OptionHandler, UnsupervisedFilter {
82
83  /** for serialization. */
84  private static final long serialVersionUID = 4626939780964387784L;
85
86  /** The data to transform analyse/transform. */
87  protected Instances m_TrainInstances;
88
89  /** Keep a copy for the class attribute (if set). */
90  protected Instances m_TrainCopy;
91
92  /** The header for the transformed data format. */
93  protected Instances m_TransformedFormat;
94
95  /** Data has a class set. */
96  protected boolean m_HasClass;
97
98  /** Class index. */
99  protected int m_ClassIndex;
100
101  /** Number of attributes. */
102  protected int m_NumAttribs;
103
104  /** Number of instances. */
105  protected int m_NumInstances;
106
107  /** Correlation matrix for the original data. */
108  protected double[][] m_Correlation;
109
110  /** Will hold the unordered linear transformations of the (normalized)
111      original data. */
112  protected double[][] m_Eigenvectors;
113
114  /** Eigenvalues for the corresponding eigenvectors. */
115  protected double[] m_Eigenvalues = null;
116
117  /** Sorted eigenvalues. */
118  protected int[] m_SortedEigens;
119
120  /** sum of the eigenvalues. */
121  protected double m_SumOfEigenValues = 0.0;
122
123  /** Filters for replacing missing values. */
124  protected ReplaceMissingValues m_ReplaceMissingFilter;
125 
126  /** Filter for normalizing the data. */
127  protected Normalize m_NormalizeFilter;
128 
129  /** Filter for turning nominal values into numeric ones. */
130  protected NominalToBinary m_NominalToBinaryFilter;
131 
132  /** Filter for removing class attribute, nominal attributes with 0 or 1 value. */
133  protected Remove m_AttributeFilter;
134
135  /** The number of attributes in the pc transformed data. */
136  protected int m_OutputNumAtts = -1;
137
138  /** normalize the input data? */
139  protected boolean m_Normalize = true;
140
141  /** the amount of varaince to cover in the original data when
142      retaining the best n PC's. */
143  protected double m_CoverVariance = 0.95;
144
145  /** maximum number of attributes in the transformed attribute name. */
146  protected int m_MaxAttrsInName = 5;
147
148  /** maximum number of attributes in the transformed data (-1 for all). */
149  protected int m_MaxAttributes = -1;
150
151  /**
152   * Returns a string describing this filter.
153   *
154   * @return            a description of the filter suitable for
155   *                    displaying in the explorer/experimenter gui
156   */
157  public String globalInfo() {
158    return 
159        "Performs a principal components analysis and transformation of "
160      + "the data.\n"
161      + "Dimensionality reduction is accomplished by choosing enough eigenvectors "
162      + "to account for some percentage of the variance in the original data -- "
163      + "default 0.95 (95%).\n"
164      + "Based on code of the attribute selection scheme 'PrincipalComponents' "
165      + "by Mark Hall and Gabi Schmidberger.";
166  }
167
168  /**
169   * Returns an enumeration describing the available options.
170   *
171   * @return            an enumeration of all the available options.
172   */
173  public Enumeration listOptions() {
174    Vector result = new Vector();
175
176    result.addElement(new Option(
177        "\tDon't normalize input data.", 
178        "D", 0, "-D"));
179
180    result.addElement(new Option(
181        "\tRetain enough PC attributes to account\n"
182        +"\tfor this proportion of variance in the original data.\n"
183        + "\t(default: 0.95)",
184        "R", 1, "-R <num>"));
185
186    result.addElement(new Option(
187        "\tMaximum number of attributes to include in \n"
188        + "\ttransformed attribute names.\n"
189        + "\t(-1 = include all, default: 5)", 
190        "A", 1, "-A <num>"));
191
192    result.addElement(new Option(
193        "\tMaximum number of PC attributes to retain.\n"
194        + "\t(-1 = include all, default: -1)", 
195        "M", 1, "-M <num>"));
196
197    return result.elements();
198  }
199
200  /**
201   * Parses a list of options for this object. <p/>
202   *
203   <!-- options-start -->
204   * Valid options are: <p/>
205   *
206   * <pre> -D
207   *  Don't normalize input data.</pre>
208   *
209   * <pre> -R &lt;num&gt;
210   *  Retain enough PC attributes to account
211   *  for this proportion of variance in the original data.
212   *  (default: 0.95)</pre>
213   *
214   * <pre> -A &lt;num&gt;
215   *  Maximum number of attributes to include in
216   *  transformed attribute names.
217   *  (-1 = include all, default: 5)</pre>
218   *
219   * <pre> -M &lt;num&gt;
220   *  Maximum number of PC attributes to retain.
221   *  (-1 = include all, default: -1)</pre>
222   *
223   <!-- options-end -->
224   *
225   * @param options     the list of options as an array of strings
226   * @throws Exception  if an option is not supported
227   */
228  public void setOptions(String[] options) throws Exception {
229    String        tmpStr;
230
231    tmpStr = Utils.getOption('R', options);
232    if (tmpStr.length() != 0)
233      setVarianceCovered(Double.parseDouble(tmpStr));
234    else
235      setVarianceCovered(0.95);
236
237    tmpStr = Utils.getOption('A', options);
238    if (tmpStr.length() != 0)
239      setMaximumAttributeNames(Integer.parseInt(tmpStr));
240    else
241      setMaximumAttributeNames(5);
242
243    tmpStr = Utils.getOption('M', options);
244    if (tmpStr.length() != 0)
245      setMaximumAttributes(Integer.parseInt(tmpStr));
246    else
247      setMaximumAttributes(-1);
248
249    setNormalize(!Utils.getFlag('D', options));
250  }
251
252  /**
253   * Gets the current settings of the filter.
254   *
255   * @return            an array of strings suitable for passing to setOptions
256   */
257  public String[] getOptions() {
258    Vector<String>      result;
259
260    result = new Vector<String>();
261
262    result.add("-R");
263    result.add("" + getVarianceCovered());
264
265    result.add("-A");
266    result.add("" + getMaximumAttributeNames());
267
268    result.add("-M");
269    result.add("" + getMaximumAttributes());
270
271    if (!getNormalize())
272      result.add("-D");
273
274    return result.toArray(new String[result.size()]);
275  }
276
277  /**
278   * Returns the tip text for this property.
279   *
280   * @return            tip text for this property suitable for
281   *                    displaying in the explorer/experimenter gui
282   */
283  public String normalizeTipText() {
284    return "Normalize input data.";
285  }
286
287  /**
288   * Set whether input data will be normalized.
289   *
290   * @param value       true if input data is to be normalized
291   */
292  public void setNormalize(boolean value) {
293    m_Normalize = value;
294  }
295
296  /**
297   * Gets whether or not input data is to be normalized.
298   *
299   * @return            true if input data is to be normalized
300   */
301  public boolean getNormalize() {
302    return m_Normalize;
303  }
304
305  /**
306   * Returns the tip text for this property.
307   *
308   * @return            tip text for this property suitable for
309   *                    displaying in the explorer/experimenter gui
310   */
311  public String varianceCoveredTipText() {
312    return "Retain enough PC attributes to account for this proportion of variance.";
313  }
314
315  /**
316   * Sets the amount of variance to account for when retaining
317   * principal components.
318   *
319   * @param value       the proportion of total variance to account for
320   */
321  public void setVarianceCovered(double value) {
322    m_CoverVariance = value;
323  }
324
325  /**
326   * Gets the proportion of total variance to account for when
327   * retaining principal components.
328   *
329   * @return            the proportion of variance to account for
330   */
331  public double getVarianceCovered() {
332    return m_CoverVariance;
333  }
334
335  /**
336   * Returns the tip text for this property.
337   *
338   * @return            tip text for this property suitable for
339   *                    displaying in the explorer/experimenter gui
340   */
341  public String maximumAttributeNamesTipText() {
342    return "The maximum number of attributes to include in transformed attribute names.";
343  }
344
345  /**
346   * Sets maximum number of attributes to include in
347   * transformed attribute names.
348   *
349   * @param value       the maximum number of attributes
350   */
351  public void setMaximumAttributeNames(int value) {
352    m_MaxAttrsInName = value;
353  }
354
355  /**
356   * Gets maximum number of attributes to include in
357   * transformed attribute names.
358   *
359   * @return            the maximum number of attributes
360   */
361  public int getMaximumAttributeNames() {
362    return m_MaxAttrsInName;
363  }
364
365  /**
366   * Returns the tip text for this property.
367   *
368   * @return            tip text for this property suitable for
369   *                    displaying in the explorer/experimenter gui
370   */
371  public String maximumAttributesTipText() {
372    return "The maximum number of PC attributes to retain.";
373  }
374
375  /**
376   * Sets maximum number of PC attributes to retain.
377   *
378   * @param value       the maximum number of attributes
379   */
380  public void setMaximumAttributes(int value) {
381    m_MaxAttributes = value;
382  }
383
384  /**
385   * Gets maximum number of PC attributes to retain.
386   *
387   * @return            the maximum number of attributes
388   */
389  public int getMaximumAttributes() {
390    return m_MaxAttributes;
391  }
392
393  /**
394   * Returns the capabilities of this evaluator.
395   *
396   * @return            the capabilities of this evaluator
397   * @see               Capabilities
398   */
399  public Capabilities getCapabilities() {
400    Capabilities result = super.getCapabilities();
401    result.disableAll();
402
403    // attributes
404    result.enable(Capability.NOMINAL_ATTRIBUTES);
405    result.enable(Capability.NUMERIC_ATTRIBUTES);
406    result.enable(Capability.DATE_ATTRIBUTES);
407    result.enable(Capability.MISSING_VALUES);
408
409    // class
410    result.enable(Capability.NOMINAL_CLASS);
411    result.enable(Capability.NUMERIC_CLASS);
412    result.enable(Capability.DATE_CLASS);
413    result.enable(Capability.MISSING_CLASS_VALUES);
414    result.enable(Capability.NO_CLASS);
415
416    return result;
417  }
418
419  /**
420   * Determines the output format based on the input format and returns
421   * this. In case the output format cannot be returned immediately, i.e.,
422   * immediateOutputFormat() returns false, then this method will be called
423   * from batchFinished().
424   *
425   * @param inputFormat     the input format to base the output format on
426   * @return                the output format
427   * @throws Exception      in case the determination goes wrong
428   * @see   #hasImmediateOutputFormat()
429   * @see   #batchFinished()
430   */
431  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
432    double              cumulative;
433    FastVector          attributes;
434    int                 i;
435    int                 j;
436    StringBuffer        attName;
437    double[]            coeff_mags;
438    int                 num_attrs;
439    int[]               coeff_inds;
440    double              coeff_value;
441    int                 numAttsLowerBound;
442   
443    if (m_Eigenvalues == null)
444      return inputFormat;
445
446    if (m_MaxAttributes > 0)
447      numAttsLowerBound = m_NumAttribs - m_MaxAttributes;
448    else
449      numAttsLowerBound = 0;
450    if (numAttsLowerBound < 0)
451      numAttsLowerBound = 0;
452   
453    cumulative = 0.0;
454    attributes = new FastVector();
455    for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {
456      attName = new StringBuffer();
457      // build array of coefficients
458      coeff_mags = new double[m_NumAttribs];
459      for (j = 0; j < m_NumAttribs; j++)
460        coeff_mags[j] = -Math.abs(m_Eigenvectors[j][m_SortedEigens[i]]);
461      num_attrs = (m_MaxAttrsInName > 0) ? Math.min(m_NumAttribs, m_MaxAttrsInName) : m_NumAttribs;
462
463      // this array contains the sorted indices of the coefficients
464      if (m_NumAttribs > 0) {
465        // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude
466        coeff_inds = Utils.sort(coeff_mags);
467      }
468      else {
469        // if  m_maxAttrsInName <= 0, use all coeffs in original order
470        coeff_inds = new int[m_NumAttribs];
471        for (j = 0; j < m_NumAttribs; j++)
472          coeff_inds[j] = j;
473      }
474      // build final attName string
475      for (j = 0; j < num_attrs; j++) {
476        coeff_value = m_Eigenvectors[coeff_inds[j]][m_SortedEigens[i]];
477        if (j > 0 && coeff_value >= 0)
478          attName.append("+");
479        attName.append(
480            Utils.doubleToString(coeff_value,5,3) 
481            + inputFormat.attribute(coeff_inds[j]).name());
482      }
483      if (num_attrs < m_NumAttribs)
484        attName.append("...");
485
486      attributes.addElement(new Attribute(attName.toString()));
487      cumulative += m_Eigenvalues[m_SortedEigens[i]];
488
489      if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)
490        break;
491    }
492
493    if (m_HasClass)
494      attributes.addElement(m_TrainCopy.classAttribute().copy());
495
496    Instances outputFormat = 
497      new Instances(
498          m_TrainCopy.relationName() + "_principal components", attributes, 0);
499
500    // set the class to be the last attribute if necessary
501    if (m_HasClass)
502      outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
503
504    m_OutputNumAtts = outputFormat.numAttributes();
505   
506    return outputFormat;
507  }
508
509  /**
510   * Fill the correlation matrix.
511   */
512  protected void fillCorrelation() {
513    int         i;
514    int         j;
515    int         k;
516    double[]    att1;
517    double[]    att2;
518    double      corr;
519   
520    m_Correlation = new double[m_NumAttribs][m_NumAttribs];
521    att1          = new double [m_NumInstances];
522    att2          = new double [m_NumInstances];
523
524    for (i = 0; i < m_NumAttribs; i++) {
525      for (j = 0; j < m_NumAttribs; j++) {
526        if (i == j) {
527          m_Correlation[i][j] = 1.0;
528        }
529        else {
530          for (k = 0; k < m_NumInstances; k++) {
531            att1[k] = m_TrainInstances.instance(k).value(i);
532            att2[k] = m_TrainInstances.instance(k).value(j);
533          }
534          corr = Utils.correlation(att1,att2,m_NumInstances);
535          m_Correlation[i][j] = corr;
536          m_Correlation[j][i] = corr;
537        }
538      }
539    }
540  }
541
542  /**
543   * Transform an instance in original (unormalized) format.
544   *
545   * @param instance    an instance in the original (unormalized) format
546   * @return            a transformed instance
547   * @throws Exception  if instance can't be transformed
548   */
549  protected Instance convertInstance(Instance instance) throws Exception {
550    Instance    result;
551    double[]    newVals;
552    Instance    tempInst;
553    double      cumulative;
554    int         i;
555    int         j;
556    double      tempval;
557    int         numAttsLowerBound;
558   
559    newVals  = new double[m_OutputNumAtts];
560    tempInst = (Instance) instance.copy();
561
562    m_ReplaceMissingFilter.input(tempInst);
563    m_ReplaceMissingFilter.batchFinished();
564    tempInst = m_ReplaceMissingFilter.output();
565
566    if (m_Normalize) {
567      m_NormalizeFilter.input(tempInst);
568      m_NormalizeFilter.batchFinished();
569      tempInst = m_NormalizeFilter.output();
570    }
571
572    m_NominalToBinaryFilter.input(tempInst);
573    m_NominalToBinaryFilter.batchFinished();
574    tempInst = m_NominalToBinaryFilter.output();
575
576    if (m_AttributeFilter != null) {
577      m_AttributeFilter.input(tempInst);
578      m_AttributeFilter.batchFinished();
579      tempInst = m_AttributeFilter.output();
580    }
581
582    if (m_HasClass)
583      newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex());
584
585    if (m_MaxAttributes > 0)
586      numAttsLowerBound = m_NumAttribs - m_MaxAttributes;
587    else
588      numAttsLowerBound = 0;
589    if (numAttsLowerBound < 0)
590      numAttsLowerBound = 0;
591   
592    cumulative = 0;
593    for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {
594      tempval = 0.0;
595      for (j = 0; j < m_NumAttribs; j++)
596        tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInst.value(j);
597
598      newVals[m_NumAttribs - i - 1] = tempval;
599      cumulative += m_Eigenvalues[m_SortedEigens[i]];
600      if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)
601        break;
602    }
603
604    // create instance
605    if (instance instanceof SparseInstance)
606      result = new SparseInstance(instance.weight(), newVals);
607    else
608      result = new DenseInstance(instance.weight(), newVals);
609   
610    return result;
611  }
612
613  /**
614   * Initializes the filter with the given input data.
615   *
616   * @param instances   the data to process
617   * @throws Exception  in case the processing goes wrong
618   * @see               #batchFinished()
619   */
620  protected void setup(Instances instances) throws Exception {
621    int                         i;
622    int                         j;
623    Vector<Integer>             deleteCols;
624    int[]                       todelete;
625    double[][]                  v;
626    Matrix                      corr;
627    EigenvalueDecomposition     eig;
628    Matrix                      V;
629   
630    m_TrainInstances = new Instances(instances);
631
632    // make a copy of the training data so that we can get the class
633    // column to append to the transformed data (if necessary)
634    m_TrainCopy = new Instances(m_TrainInstances, 0);
635
636    m_ReplaceMissingFilter = new ReplaceMissingValues();
637    m_ReplaceMissingFilter.setInputFormat(m_TrainInstances);
638    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_ReplaceMissingFilter);
639
640    if (m_Normalize) {
641      m_NormalizeFilter = new Normalize();
642      m_NormalizeFilter.setInputFormat(m_TrainInstances);
643      m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NormalizeFilter);
644    }
645
646    m_NominalToBinaryFilter = new NominalToBinary();
647    m_NominalToBinaryFilter.setInputFormat(m_TrainInstances);
648    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NominalToBinaryFilter);
649
650    // delete any attributes with only one distinct value or are all missing
651    deleteCols = new Vector<Integer>();
652    for (i = 0; i < m_TrainInstances.numAttributes(); i++) {
653      if (m_TrainInstances.numDistinctValues(i) <= 1)
654        deleteCols.addElement(i);
655    }
656
657    if (m_TrainInstances.classIndex() >=0) {
658      // get rid of the class column
659      m_HasClass = true;
660      m_ClassIndex = m_TrainInstances.classIndex();
661      deleteCols.addElement(new Integer(m_ClassIndex));
662    }
663
664    // remove columns from the data if necessary
665    if (deleteCols.size() > 0) {
666      m_AttributeFilter = new Remove();
667      todelete = new int [deleteCols.size()];
668      for (i = 0; i < deleteCols.size(); i++)
669        todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();
670      m_AttributeFilter.setAttributeIndicesArray(todelete);
671      m_AttributeFilter.setInvertSelection(false);
672      m_AttributeFilter.setInputFormat(m_TrainInstances);
673      m_TrainInstances = Filter.useFilter(m_TrainInstances, m_AttributeFilter);
674    }
675
676    // can evaluator handle the processed data ? e.g., enough attributes?
677    getCapabilities().testWithFail(m_TrainInstances);
678
679    m_NumInstances = m_TrainInstances.numInstances();
680    m_NumAttribs   = m_TrainInstances.numAttributes();
681
682    fillCorrelation();
683
684    // get eigen vectors/values
685    corr = new Matrix(m_Correlation);
686    eig  = corr.eig();
687    V    = eig.getV();
688    v    = new double[m_NumAttribs][m_NumAttribs];
689    for (i = 0; i < v.length; i++) {
690      for (j = 0; j < v[0].length; j++)
691        v[i][j] = V.get(i, j);
692    }
693    m_Eigenvectors = (double[][]) v.clone();
694    m_Eigenvalues  = (double[]) eig.getRealEigenvalues().clone();
695
696    // any eigenvalues less than 0 are not worth anything --- change to 0
697    for (i = 0; i < m_Eigenvalues.length; i++) {
698      if (m_Eigenvalues[i] < 0)
699        m_Eigenvalues[i] = 0.0;
700    }
701    m_SortedEigens     = Utils.sort(m_Eigenvalues);
702    m_SumOfEigenValues = Utils.sum(m_Eigenvalues);
703
704    m_TransformedFormat = determineOutputFormat(m_TrainInstances);
705    setOutputFormat(m_TransformedFormat);
706   
707    m_TrainInstances = null;
708  }
709
710  /**
711   * Sets the format of the input instances.
712   *
713   * @param instanceInfo        an Instances object containing the input
714   *                            instance structure (any instances contained
715   *                            in the object are ignored - only the structure
716   *                            is required).
717   * @return                    true if the outputFormat may be collected
718   *                            immediately
719   * @throws Exception          if the input format can't be set successfully
720   */
721  public boolean setInputFormat(Instances instanceInfo) throws Exception {
722    super.setInputFormat(instanceInfo);
723
724    m_Eigenvalues           = null;
725    m_OutputNumAtts         = -1;
726    m_AttributeFilter       = null;
727    m_NominalToBinaryFilter = null;
728    m_SumOfEigenValues      = 0.0;
729   
730    return false;
731  }
732
733  /**
734   * Input an instance for filtering. Filter requires all
735   * training instances be read before producing output.
736   *
737   * @param instance                    the input instance
738   * @return                            true if the filtered instance may now be
739   *                                    collected with output().
740   * @throws IllegalStateException      if no input format has been set
741   * @throws Exception                  if conversion fails
742   */
743  public boolean input(Instance instance) throws Exception {
744    Instance    inst;
745   
746    if (getInputFormat() == null)
747      throw new IllegalStateException("No input instance format defined");
748
749    if (isNewBatch()) {
750      resetQueue();
751      m_NewBatch = false;
752    }
753   
754    if (isFirstBatchDone()) {
755      inst = convertInstance(instance);
756      inst.setDataset(getOutputFormat());
757      push(inst);
758      return true;
759    }
760    else {
761      bufferInput(instance);
762      return false;
763    }
764  }
765
766  /**
767   * Signify that this batch of input to the filter is finished.
768   *
769   * @return true                       if there are instances pending output
770   * @throws NullPointerException       if no input structure has been defined,
771   * @throws Exception                  if there was a problem finishing the batch.
772   */
773  public boolean batchFinished() throws Exception {
774    int         i;
775    Instances   insts;
776    Instance    inst;
777   
778    if (getInputFormat() == null)
779      throw new NullPointerException("No input instance format defined");
780
781    insts = getInputFormat();
782
783    if (!isFirstBatchDone())
784      setup(insts);
785   
786    for (i = 0; i < insts.numInstances(); i++) {
787      inst = convertInstance(insts.instance(i));
788      inst.setDataset(getOutputFormat());
789      push(inst);
790    }
791   
792    flushInput();
793    m_NewBatch       = true;
794    m_FirstBatchDone = true;
795   
796    return (numPendingOutput() != 0);
797  }
798 
799  /**
800   * Returns the revision string.
801   *
802   * @return            the revision
803   */
804  public String getRevision() {
805    return RevisionUtils.extract("$Revision: 5987 $");
806  }
807
808  /**
809   * Main method for running this filter.
810   *
811   * @param args        should contain arguments to the filter: use -h for help
812   */
813  public static void main(String[] args) {
814    runFilter(new PrincipalComponents(), args);
815  }
816}
Note: See TracBrowser for help on using the repository browser.