Context Navigation

PrincipalComponents.java

Last change on this file was 29, checked in by gnappo, 15 years ago
Taggata versione per la demo e aggiunto branch.
File size: 23.6 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* PrincipalComponents.java
19	* Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20	*/
21
22	package weka.filters.unsupervised.attribute;
23
24	import weka.core.Attribute;
25	import weka.core.Capabilities;
26	import weka.core.FastVector;
27	import weka.core.Instance;
28	import weka.core.DenseInstance;
29	import weka.core.Instances;
30	import weka.core.Option;
31	import weka.core.OptionHandler;
32	import weka.core.RevisionUtils;
33	import weka.core.SparseInstance;
34	import weka.core.Utils;
35	import weka.core.Capabilities.Capability;
36	import weka.core.matrix.EigenvalueDecomposition;
37	import weka.core.matrix.Matrix;
38	import weka.filters.Filter;
39	import weka.filters.UnsupervisedFilter;
40
41	import java.util.Enumeration;
42	import java.util.Vector;
43
44	/**
45	<!-- globalinfo-start -->
46	* Performs a principal components analysis and transformation of the data.<br/>
47	* Dimensionality reduction is accomplished by choosing enough eigenvectors to account for some percentage of the variance in the original data -- default 0.95 (95%).<br/>
48	* Based on code of the attribute selection scheme 'PrincipalComponents' by Mark Hall and Gabi Schmidberger.
49	* <p/>
50	<!-- globalinfo-end -->
51	*
52	<!-- options-start -->
53	* Valid options are: <p/>
54	*
55	* <pre> -D
56	* Don't normalize input data.</pre>
57	*
58	* <pre> -R <num>
59	* Retain enough PC attributes to account
60	* for this proportion of variance in the original data.
61	* (default: 0.95)</pre>
62	*
63	* <pre> -A <num>
64	* Maximum number of attributes to include in
65	* transformed attribute names.
66	* (-1 = include all, default: 5)</pre>
67	*
68	* <pre> -M <num>
69	* Maximum number of PC attributes to retain.
70	* (-1 = include all, default: -1)</pre>
71	*
72	<!-- options-end -->
73	*
74	* @author Mark Hall (mhall@cs.waikato.ac.nz) -- attribute selection code
75	* @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) -- attribute selection code
76	* @author fracpete (fracpete at waikato dot ac dot nz) -- filter code
77	* @version $Revision: 5987 $
78	*/
79	public class PrincipalComponents
80	extends Filter
81	implements OptionHandler, UnsupervisedFilter {
82
83	/** for serialization. */
84	private static final long serialVersionUID = 4626939780964387784L;
85
86	/** The data to transform analyse/transform. */
87	protected Instances m_TrainInstances;
88
89	/** Keep a copy for the class attribute (if set). */
90	protected Instances m_TrainCopy;
91
92	/** The header for the transformed data format. */
93	protected Instances m_TransformedFormat;
94
95	/** Data has a class set. */
96	protected boolean m_HasClass;
97
98	/** Class index. */
99	protected int m_ClassIndex;
100
101	/** Number of attributes. */
102	protected int m_NumAttribs;
103
104	/** Number of instances. */
105	protected int m_NumInstances;
106
107	/** Correlation matrix for the original data. */
108	protected double[][] m_Correlation;
109
110	/** Will hold the unordered linear transformations of the (normalized)
111	original data. */
112	protected double[][] m_Eigenvectors;
113
114	/** Eigenvalues for the corresponding eigenvectors. */
115	protected double[] m_Eigenvalues = null;
116
117	/** Sorted eigenvalues. */
118	protected int[] m_SortedEigens;
119
120	/** sum of the eigenvalues. */
121	protected double m_SumOfEigenValues = 0.0;
122
123	/** Filters for replacing missing values. */
124	protected ReplaceMissingValues m_ReplaceMissingFilter;
125
126	/** Filter for normalizing the data. */
127	protected Normalize m_NormalizeFilter;
128
129	/** Filter for turning nominal values into numeric ones. */
130	protected NominalToBinary m_NominalToBinaryFilter;
131
132	/** Filter for removing class attribute, nominal attributes with 0 or 1 value. */
133	protected Remove m_AttributeFilter;
134
135	/** The number of attributes in the pc transformed data. */
136	protected int m_OutputNumAtts = -1;
137
138	/** normalize the input data? */
139	protected boolean m_Normalize = true;
140
141	/** the amount of varaince to cover in the original data when
142	retaining the best n PC's. */
143	protected double m_CoverVariance = 0.95;
144
145	/** maximum number of attributes in the transformed attribute name. */
146	protected int m_MaxAttrsInName = 5;
147
148	/** maximum number of attributes in the transformed data (-1 for all). */
149	protected int m_MaxAttributes = -1;
150
151	/**
152	* Returns a string describing this filter.
153	*
154	* @return a description of the filter suitable for
155	* displaying in the explorer/experimenter gui
156	*/
157	public String globalInfo() {
158	return
159	"Performs a principal components analysis and transformation of "
160	+ "the data.\n"
161	+ "Dimensionality reduction is accomplished by choosing enough eigenvectors "
162	+ "to account for some percentage of the variance in the original data -- "
163	+ "default 0.95 (95%).\n"
164	+ "Based on code of the attribute selection scheme 'PrincipalComponents' "
165	+ "by Mark Hall and Gabi Schmidberger.";
166	}
167
168	/**
169	* Returns an enumeration describing the available options.
170	*
171	* @return an enumeration of all the available options.
172	*/
173	public Enumeration listOptions() {
174	Vector result = new Vector();
175
176	result.addElement(new Option(
177	"\tDon't normalize input data.",
178	"D", 0, "-D"));
179
180	result.addElement(new Option(
181	"\tRetain enough PC attributes to account\n"
182	+"\tfor this proportion of variance in the original data.\n"
183	+ "\t(default: 0.95)",
184	"R", 1, "-R <num>"));
185
186	result.addElement(new Option(
187	"\tMaximum number of attributes to include in \n"
188	+ "\ttransformed attribute names.\n"
189	+ "\t(-1 = include all, default: 5)",
190	"A", 1, "-A <num>"));
191
192	result.addElement(new Option(
193	"\tMaximum number of PC attributes to retain.\n"
194	+ "\t(-1 = include all, default: -1)",
195	"M", 1, "-M <num>"));
196
197	return result.elements();
198	}
199
200	/**
201	* Parses a list of options for this object. <p/>
202	*
203	<!-- options-start -->
204	* Valid options are: <p/>
205	*
206	* <pre> -D
207	* Don't normalize input data.</pre>
208	*
209	* <pre> -R <num>
210	* Retain enough PC attributes to account
211	* for this proportion of variance in the original data.
212	* (default: 0.95)</pre>
213	*
214	* <pre> -A <num>
215	* Maximum number of attributes to include in
216	* transformed attribute names.
217	* (-1 = include all, default: 5)</pre>
218	*
219	* <pre> -M <num>
220	* Maximum number of PC attributes to retain.
221	* (-1 = include all, default: -1)</pre>
222	*
223	<!-- options-end -->
224	*
225	* @param options the list of options as an array of strings
226	* @throws Exception if an option is not supported
227	*/
228	public void setOptions(String[] options) throws Exception {
229	String tmpStr;
230
231	tmpStr = Utils.getOption('R', options);
232	if (tmpStr.length() != 0)
233	setVarianceCovered(Double.parseDouble(tmpStr));
234	else
235	setVarianceCovered(0.95);
236
237	tmpStr = Utils.getOption('A', options);
238	if (tmpStr.length() != 0)
239	setMaximumAttributeNames(Integer.parseInt(tmpStr));
240	else
241	setMaximumAttributeNames(5);
242
243	tmpStr = Utils.getOption('M', options);
244	if (tmpStr.length() != 0)
245	setMaximumAttributes(Integer.parseInt(tmpStr));
246	else
247	setMaximumAttributes(-1);
248
249	setNormalize(!Utils.getFlag('D', options));
250	}
251
252	/**
253	* Gets the current settings of the filter.
254	*
255	* @return an array of strings suitable for passing to setOptions
256	*/
257	public String[] getOptions() {
258	Vector<String> result;
259
260	result = new Vector<String>();
261
262	result.add("-R");
263	result.add("" + getVarianceCovered());
264
265	result.add("-A");
266	result.add("" + getMaximumAttributeNames());
267
268	result.add("-M");
269	result.add("" + getMaximumAttributes());
270
271	if (!getNormalize())
272	result.add("-D");
273
274	return result.toArray(new String[result.size()]);
275	}
276
277	/**
278	* Returns the tip text for this property.
279	*
280	* @return tip text for this property suitable for
281	* displaying in the explorer/experimenter gui
282	*/
283	public String normalizeTipText() {
284	return "Normalize input data.";
285	}
286
287	/**
288	* Set whether input data will be normalized.
289	*
290	* @param value true if input data is to be normalized
291	*/
292	public void setNormalize(boolean value) {
293	m_Normalize = value;
294	}
295
296	/**
297	* Gets whether or not input data is to be normalized.
298	*
299	* @return true if input data is to be normalized
300	*/
301	public boolean getNormalize() {
302	return m_Normalize;
303	}
304
305	/**
306	* Returns the tip text for this property.
307	*
308	* @return tip text for this property suitable for
309	* displaying in the explorer/experimenter gui
310	*/
311	public String varianceCoveredTipText() {
312	return "Retain enough PC attributes to account for this proportion of variance.";
313	}
314
315	/**
316	* Sets the amount of variance to account for when retaining
317	* principal components.
318	*
319	* @param value the proportion of total variance to account for
320	*/
321	public void setVarianceCovered(double value) {
322	m_CoverVariance = value;
323	}
324
325	/**
326	* Gets the proportion of total variance to account for when
327	* retaining principal components.
328	*
329	* @return the proportion of variance to account for
330	*/
331	public double getVarianceCovered() {
332	return m_CoverVariance;
333	}
334
335	/**
336	* Returns the tip text for this property.
337	*
338	* @return tip text for this property suitable for
339	* displaying in the explorer/experimenter gui
340	*/
341	public String maximumAttributeNamesTipText() {
342	return "The maximum number of attributes to include in transformed attribute names.";
343	}
344
345	/**
346	* Sets maximum number of attributes to include in
347	* transformed attribute names.
348	*
349	* @param value the maximum number of attributes
350	*/
351	public void setMaximumAttributeNames(int value) {
352	m_MaxAttrsInName = value;
353	}
354
355	/**
356	* Gets maximum number of attributes to include in
357	* transformed attribute names.
358	*
359	* @return the maximum number of attributes
360	*/
361	public int getMaximumAttributeNames() {
362	return m_MaxAttrsInName;
363	}
364
365	/**
366	* Returns the tip text for this property.
367	*
368	* @return tip text for this property suitable for
369	* displaying in the explorer/experimenter gui
370	*/
371	public String maximumAttributesTipText() {
372	return "The maximum number of PC attributes to retain.";
373	}
374
375	/**
376	* Sets maximum number of PC attributes to retain.
377	*
378	* @param value the maximum number of attributes
379	*/
380	public void setMaximumAttributes(int value) {
381	m_MaxAttributes = value;
382	}
383
384	/**
385	* Gets maximum number of PC attributes to retain.
386	*
387	* @return the maximum number of attributes
388	*/
389	public int getMaximumAttributes() {
390	return m_MaxAttributes;
391	}
392
393	/**
394	* Returns the capabilities of this evaluator.
395	*
396	* @return the capabilities of this evaluator
397	* @see Capabilities
398	*/
399	public Capabilities getCapabilities() {
400	Capabilities result = super.getCapabilities();
401	result.disableAll();
402
403	// attributes
404	result.enable(Capability.NOMINAL_ATTRIBUTES);
405	result.enable(Capability.NUMERIC_ATTRIBUTES);
406	result.enable(Capability.DATE_ATTRIBUTES);
407	result.enable(Capability.MISSING_VALUES);
408
409	// class
410	result.enable(Capability.NOMINAL_CLASS);
411	result.enable(Capability.NUMERIC_CLASS);
412	result.enable(Capability.DATE_CLASS);
413	result.enable(Capability.MISSING_CLASS_VALUES);
414	result.enable(Capability.NO_CLASS);
415
416	return result;
417	}
418
419	/**
420	* Determines the output format based on the input format and returns
421	* this. In case the output format cannot be returned immediately, i.e.,
422	* immediateOutputFormat() returns false, then this method will be called
423	* from batchFinished().
424	*
425	* @param inputFormat the input format to base the output format on
426	* @return the output format
427	* @throws Exception in case the determination goes wrong
428	* @see #hasImmediateOutputFormat()
429	* @see #batchFinished()
430	*/
431	protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
432	double cumulative;
433	FastVector attributes;
434	int i;
435	int j;
436	StringBuffer attName;
437	double[] coeff_mags;
438	int num_attrs;
439	int[] coeff_inds;
440	double coeff_value;
441	int numAttsLowerBound;
442
443	if (m_Eigenvalues == null)
444	return inputFormat;
445
446	if (m_MaxAttributes > 0)
447	numAttsLowerBound = m_NumAttribs - m_MaxAttributes;
448	else
449	numAttsLowerBound = 0;
450	if (numAttsLowerBound < 0)
451	numAttsLowerBound = 0;
452
453	cumulative = 0.0;
454	attributes = new FastVector();
455	for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {
456	attName = new StringBuffer();
457	// build array of coefficients
458	coeff_mags = new double[m_NumAttribs];
459	for (j = 0; j < m_NumAttribs; j++)
460	coeff_mags[j] = -Math.abs(m_Eigenvectors[j][m_SortedEigens[i]]);
461	num_attrs = (m_MaxAttrsInName > 0) ? Math.min(m_NumAttribs, m_MaxAttrsInName) : m_NumAttribs;
462
463	// this array contains the sorted indices of the coefficients
464	if (m_NumAttribs > 0) {
465	// if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude
466	coeff_inds = Utils.sort(coeff_mags);
467	}
468	else {
469	// if m_maxAttrsInName <= 0, use all coeffs in original order
470	coeff_inds = new int[m_NumAttribs];
471	for (j = 0; j < m_NumAttribs; j++)
472	coeff_inds[j] = j;
473	}
474	// build final attName string
475	for (j = 0; j < num_attrs; j++) {
476	coeff_value = m_Eigenvectors[coeff_inds[j]][m_SortedEigens[i]];
477	if (j > 0 && coeff_value >= 0)
478	attName.append("+");
479	attName.append(
480	Utils.doubleToString(coeff_value,5,3)
481	+ inputFormat.attribute(coeff_inds[j]).name());
482	}
483	if (num_attrs < m_NumAttribs)
484	attName.append("...");
485
486	attributes.addElement(new Attribute(attName.toString()));
487	cumulative += m_Eigenvalues[m_SortedEigens[i]];
488
489	if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)
490	break;
491	}
492
493	if (m_HasClass)
494	attributes.addElement(m_TrainCopy.classAttribute().copy());
495
496	Instances outputFormat =
497	new Instances(
498	m_TrainCopy.relationName() + "_principal components", attributes, 0);
499
500	// set the class to be the last attribute if necessary
501	if (m_HasClass)
502	outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
503
504	m_OutputNumAtts = outputFormat.numAttributes();
505
506	return outputFormat;
507	}
508
509	/**
510	* Fill the correlation matrix.
511	*/
512	protected void fillCorrelation() {
513	int i;
514	int j;
515	int k;
516	double[] att1;
517	double[] att2;
518	double corr;
519
520	m_Correlation = new double[m_NumAttribs][m_NumAttribs];
521	att1 = new double [m_NumInstances];
522	att2 = new double [m_NumInstances];
523
524	for (i = 0; i < m_NumAttribs; i++) {
525	for (j = 0; j < m_NumAttribs; j++) {
526	if (i == j) {
527	m_Correlation[i][j] = 1.0;
528	}
529	else {
530	for (k = 0; k < m_NumInstances; k++) {
531	att1[k] = m_TrainInstances.instance(k).value(i);
532	att2[k] = m_TrainInstances.instance(k).value(j);
533	}
534	corr = Utils.correlation(att1,att2,m_NumInstances);
535	m_Correlation[i][j] = corr;
536	m_Correlation[j][i] = corr;
537	}
538	}
539	}
540	}
541
542	/**
543	* Transform an instance in original (unormalized) format.
544	*
545	* @param instance an instance in the original (unormalized) format
546	* @return a transformed instance
547	* @throws Exception if instance can't be transformed
548	*/
549	protected Instance convertInstance(Instance instance) throws Exception {
550	Instance result;
551	double[] newVals;
552	Instance tempInst;
553	double cumulative;
554	int i;
555	int j;
556	double tempval;
557	int numAttsLowerBound;
558
559	newVals = new double[m_OutputNumAtts];
560	tempInst = (Instance) instance.copy();
561
562	m_ReplaceMissingFilter.input(tempInst);
563	m_ReplaceMissingFilter.batchFinished();
564	tempInst = m_ReplaceMissingFilter.output();
565
566	if (m_Normalize) {
567	m_NormalizeFilter.input(tempInst);
568	m_NormalizeFilter.batchFinished();
569	tempInst = m_NormalizeFilter.output();
570	}
571
572	m_NominalToBinaryFilter.input(tempInst);
573	m_NominalToBinaryFilter.batchFinished();
574	tempInst = m_NominalToBinaryFilter.output();
575
576	if (m_AttributeFilter != null) {
577	m_AttributeFilter.input(tempInst);
578	m_AttributeFilter.batchFinished();
579	tempInst = m_AttributeFilter.output();
580	}
581
582	if (m_HasClass)
583	newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex());
584
585	if (m_MaxAttributes > 0)
586	numAttsLowerBound = m_NumAttribs - m_MaxAttributes;
587	else
588	numAttsLowerBound = 0;
589	if (numAttsLowerBound < 0)
590	numAttsLowerBound = 0;
591
592	cumulative = 0;
593	for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {
594	tempval = 0.0;
595	for (j = 0; j < m_NumAttribs; j++)
596	tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInst.value(j);
597
598	newVals[m_NumAttribs - i - 1] = tempval;
599	cumulative += m_Eigenvalues[m_SortedEigens[i]];
600	if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)
601	break;
602	}
603
604	// create instance
605	if (instance instanceof SparseInstance)
606	result = new SparseInstance(instance.weight(), newVals);
607	else
608	result = new DenseInstance(instance.weight(), newVals);
609
610	return result;
611	}
612
613	/**
614	* Initializes the filter with the given input data.
615	*
616	* @param instances the data to process
617	* @throws Exception in case the processing goes wrong
618	* @see #batchFinished()
619	*/
620	protected void setup(Instances instances) throws Exception {
621	int i;
622	int j;
623	Vector<Integer> deleteCols;
624	int[] todelete;
625	double[][] v;
626	Matrix corr;
627	EigenvalueDecomposition eig;
628	Matrix V;
629
630	m_TrainInstances = new Instances(instances);
631
632	// make a copy of the training data so that we can get the class
633	// column to append to the transformed data (if necessary)
634	m_TrainCopy = new Instances(m_TrainInstances, 0);
635
636	m_ReplaceMissingFilter = new ReplaceMissingValues();
637	m_ReplaceMissingFilter.setInputFormat(m_TrainInstances);
638	m_TrainInstances = Filter.useFilter(m_TrainInstances, m_ReplaceMissingFilter);
639
640	if (m_Normalize) {
641	m_NormalizeFilter = new Normalize();
642	m_NormalizeFilter.setInputFormat(m_TrainInstances);
643	m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NormalizeFilter);
644	}
645
646	m_NominalToBinaryFilter = new NominalToBinary();
647	m_NominalToBinaryFilter.setInputFormat(m_TrainInstances);
648	m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NominalToBinaryFilter);
649
650	// delete any attributes with only one distinct value or are all missing
651	deleteCols = new Vector<Integer>();
652	for (i = 0; i < m_TrainInstances.numAttributes(); i++) {
653	if (m_TrainInstances.numDistinctValues(i) <= 1)
654	deleteCols.addElement(i);
655	}
656
657	if (m_TrainInstances.classIndex() >=0) {
658	// get rid of the class column
659	m_HasClass = true;
660	m_ClassIndex = m_TrainInstances.classIndex();
661	deleteCols.addElement(new Integer(m_ClassIndex));
662	}
663
664	// remove columns from the data if necessary
665	if (deleteCols.size() > 0) {
666	m_AttributeFilter = new Remove();
667	todelete = new int [deleteCols.size()];
668	for (i = 0; i < deleteCols.size(); i++)
669	todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();
670	m_AttributeFilter.setAttributeIndicesArray(todelete);
671	m_AttributeFilter.setInvertSelection(false);
672	m_AttributeFilter.setInputFormat(m_TrainInstances);
673	m_TrainInstances = Filter.useFilter(m_TrainInstances, m_AttributeFilter);
674	}
675
676	// can evaluator handle the processed data ? e.g., enough attributes?
677	getCapabilities().testWithFail(m_TrainInstances);
678
679	m_NumInstances = m_TrainInstances.numInstances();
680	m_NumAttribs = m_TrainInstances.numAttributes();
681
682	fillCorrelation();
683
684	// get eigen vectors/values
685	corr = new Matrix(m_Correlation);
686	eig = corr.eig();
687	V = eig.getV();
688	v = new double[m_NumAttribs][m_NumAttribs];
689	for (i = 0; i < v.length; i++) {
690	for (j = 0; j < v[0].length; j++)
691	v[i][j] = V.get(i, j);
692	}
693	m_Eigenvectors = (double[][]) v.clone();
694	m_Eigenvalues = (double[]) eig.getRealEigenvalues().clone();
695
696	// any eigenvalues less than 0 are not worth anything --- change to 0
697	for (i = 0; i < m_Eigenvalues.length; i++) {
698	if (m_Eigenvalues[i] < 0)
699	m_Eigenvalues[i] = 0.0;
700	}
701	m_SortedEigens = Utils.sort(m_Eigenvalues);
702	m_SumOfEigenValues = Utils.sum(m_Eigenvalues);
703
704	m_TransformedFormat = determineOutputFormat(m_TrainInstances);
705	setOutputFormat(m_TransformedFormat);
706
707	m_TrainInstances = null;
708	}
709
710	/**
711	* Sets the format of the input instances.
712	*
713	* @param instanceInfo an Instances object containing the input
714	* instance structure (any instances contained
715	* in the object are ignored - only the structure
716	* is required).
717	* @return true if the outputFormat may be collected
718	* immediately
719	* @throws Exception if the input format can't be set successfully
720	*/
721	public boolean setInputFormat(Instances instanceInfo) throws Exception {
722	super.setInputFormat(instanceInfo);
723
724	m_Eigenvalues = null;
725	m_OutputNumAtts = -1;
726	m_AttributeFilter = null;
727	m_NominalToBinaryFilter = null;
728	m_SumOfEigenValues = 0.0;
729
730	return false;
731	}
732
733	/**
734	* Input an instance for filtering. Filter requires all
735	* training instances be read before producing output.
736	*
737	* @param instance the input instance
738	* @return true if the filtered instance may now be
739	* collected with output().
740	* @throws IllegalStateException if no input format has been set
741	* @throws Exception if conversion fails
742	*/
743	public boolean input(Instance instance) throws Exception {
744	Instance inst;
745
746	if (getInputFormat() == null)
747	throw new IllegalStateException("No input instance format defined");
748
749	if (isNewBatch()) {
750	resetQueue();
751	m_NewBatch = false;
752	}
753
754	if (isFirstBatchDone()) {
755	inst = convertInstance(instance);
756	inst.setDataset(getOutputFormat());
757	push(inst);
758	return true;
759	}
760	else {
761	bufferInput(instance);
762	return false;
763	}
764	}
765
766	/**
767	* Signify that this batch of input to the filter is finished.
768	*
769	* @return true if there are instances pending output
770	* @throws NullPointerException if no input structure has been defined,
771	* @throws Exception if there was a problem finishing the batch.
772	*/
773	public boolean batchFinished() throws Exception {
774	int i;
775	Instances insts;
776	Instance inst;
777
778	if (getInputFormat() == null)
779	throw new NullPointerException("No input instance format defined");
780
781	insts = getInputFormat();
782
783	if (!isFirstBatchDone())
784	setup(insts);
785
786	for (i = 0; i < insts.numInstances(); i++) {
787	inst = convertInstance(insts.instance(i));
788	inst.setDataset(getOutputFormat());
789	push(inst);
790	}
791
792	flushInput();
793	m_NewBatch = true;
794	m_FirstBatchDone = true;
795
796	return (numPendingOutput() != 0);
797	}
798
799	/**
800	* Returns the revision string.
801	*
802	* @return the revision
803	*/
804	public String getRevision() {
805	return RevisionUtils.extract("$Revision: 5987 $");
806	}
807
808	/**
809	* Main method for running this filter.
810	*
811	* @param args should contain arguments to the filter: use -h for help
812	*/
813	public static void main(String[] args) {
814	runFilter(new PrincipalComponents(), args);
815	}
816	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: tags/MetisMQIDemo/src/main/java/weka/filters/unsupervised/attribute/PrincipalComponents.java

Download in other formats: