Context Navigation

source: src/main/java/weka/attributeSelection/SubsetSizeForwardSelection.java @ 6

Last change on this file since 6 was 4, checked in by gnappo, 16 years ago
Import di weka.
File size: 27.6 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* SubsetSizeForwardSelection.java
19	* Copyright (C) 2007 Martin Guetlein
20	*
21	*/
22	package weka.attributeSelection;
23
24	import weka.core.Instances;
25	import weka.core.Option;
26	import weka.core.OptionHandler;
27	import weka.core.RevisionUtils;
28	import weka.core.SelectedTag;
29	import weka.core.Tag;
30	import weka.core.TechnicalInformation;
31	import weka.core.Utils;
32	import weka.core.TechnicalInformation.Field;
33	import weka.core.TechnicalInformation.Type;
34
35	import java.util.BitSet;
36	import java.util.Enumeration;
37	import java.util.Random;
38	import java.util.Vector;
39
40
41	/**
42	<!-- globalinfo-start -->
43	* SubsetSizeForwardSelection:<br/>
44	* <br/>
45	* Extension of LinearForwardSelection. The search performs an interior cross-validation (seed and number of folds can be specified). A LinearForwardSelection is performed on each foldto determine the optimal subset-size (using the given SubsetSizeEvaluator). Finally, a LinearForwardSelection up to the optimal subset-size is performed on the whole data.<br/>
46	* <br/>
47	* For more information see:<br/>
48	* <br/>
49	* Martin Guetlein (2006). Large Scale Attribute Selection Using Wrappers. Freiburg, Germany.
50	* <p/>
51	<!-- globalinfo-end -->
52	*
53	<!-- options-start -->
54	* Valid options are: <p/>
55	*
56	* <pre> -I
57	* Perform initial ranking to select the
58	* top-ranked attributes.</pre>
59	*
60	* <pre> -K <num>
61	* Number of top-ranked attributes that are
62	* taken into account by the search.</pre>
63	*
64	* <pre> -T <0 = fixed-set \| 1 = fixed-width>
65	* Type of Linear Forward Selection (default = 0).</pre>
66	*
67	* <pre> -S <num>
68	* Size of lookup cache for evaluated subsets.
69	* Expressed as a multiple of the number of
70	* attributes in the data set. (default = 1)</pre>
71	*
72	* <pre> -E <subset evaluator>
73	* Subset-evaluator used for subset-size determination.-- -M</pre>
74	*
75	* <pre> -F <num>
76	* Number of cross validation folds
77	* for subset size determination (default = 5).</pre>
78	*
79	* <pre> -R <num>
80	* Seed for cross validation
81	* subset size determination. (default = 1)</pre>
82	*
83	* <pre> -Z
84	* verbose on/off</pre>
85	*
86	* <pre>
87	* Options specific to evaluator weka.attributeSelection.ClassifierSubsetEval:
88	* </pre>
89	*
90	* <pre> -B <classifier>
91	* class name of the classifier to use for accuracy estimation.
92	* Place any classifier options LAST on the command line
93	* following a "--". eg.:
94	* -B weka.classifiers.bayes.NaiveBayes ... -- -K
95	* (default: weka.classifiers.rules.ZeroR)</pre>
96	*
97	* <pre> -T
98	* Use the training data to estimate accuracy.</pre>
99	*
100	* <pre> -H <filename>
101	* Name of the hold out/test set to
102	* estimate accuracy on.</pre>
103	*
104	* <pre>
105	* Options specific to scheme weka.classifiers.rules.ZeroR:
106	* </pre>
107	*
108	* <pre> -D
109	* If set, classifier is run in debug mode and
110	* may output additional info to the console</pre>
111	*
112	<!-- options-end -->
113	*
114	* @author Martin Guetlein (martin.guetlein@gmail.com)
115	* @version $Revision: 5604 $
116	*/
117	public class SubsetSizeForwardSelection extends ASSearch
118	implements OptionHandler {
119	/** search directions */
120	protected static final int TYPE_FIXED_SET = 0;
121	protected static final int TYPE_FIXED_WIDTH = 1;
122	public static final Tag[] TAGS_TYPE = {
123	new Tag(TYPE_FIXED_SET, "Fixed-set"),
124	new Tag(TYPE_FIXED_WIDTH, "Fixed-width"),
125	};
126
127	// member variables
128	/** perform initial ranking to select top-ranked attributes */
129	protected boolean m_performRanking;
130
131	/**
132	* number of top-ranked attributes that are taken into account for the
133	* search
134	*/
135	protected int m_numUsedAttributes;
136
137	/** 0 == fixed-set, 1 == fixed-width */
138	protected int m_linearSelectionType;
139
140	/** the subset evaluator to use for subset size determination */
141	private ASEvaluation m_setSizeEval;
142
143	/**
144	* Number of cross validation folds for subset size determination (default =
145	* 5).
146	*/
147	protected int m_numFolds;
148
149	/** Seed for cross validation subset size determination. (default = 1) */
150	protected int m_seed;
151
152	/** number of attributes in the data */
153	protected int m_numAttribs;
154
155	/** total number of subsets evaluated during a search */
156	protected int m_totalEvals;
157
158	/** for debugging */
159	protected boolean m_verbose;
160
161	/** holds the merit of the best subset found */
162	protected double m_bestMerit;
163
164	/** holds the maximum size of the lookup cache for evaluated subsets */
165	protected int m_cacheSize;
166
167	/**
168	* Constructor
169	*/
170	public SubsetSizeForwardSelection() {
171	resetOptions();
172	}
173
174	/**
175	* Returns a string describing this search method
176	*
177	* @return a description of the search method suitable for displaying in the
178	* explorer/experimenter gui
179	*/
180	public String globalInfo() {
181	return "SubsetSizeForwardSelection:\n\n" +
182	"Extension of LinearForwardSelection. The search performs an interior " +
183	"cross-validation (seed and number of folds can be specified). A " +
184	"LinearForwardSelection is performed on each foldto determine the optimal " +
185	"subset-size (using the given SubsetSizeEvaluator). Finally, a " +
186	"LinearForwardSelection up to the optimal subset-size is performed on " +
187	"the whole data.\n\n"
188	+ "For more information see:\n\n"
189	+ getTechnicalInformation().toString();
190	}
191
192	/**
193	* Returns an instance of a TechnicalInformation object, containing
194	* detailed information about the technical background of this class,
195	* e.g., paper reference or book this class is based on.
196	*
197	* @return the technical information about this class
198	*/
199	public TechnicalInformation getTechnicalInformation() {
200	TechnicalInformation result;
201	TechnicalInformation additional;
202
203	result = new TechnicalInformation(Type.INPROCEEDINGS);
204	result.setValue(Field.AUTHOR, "Martin Guetlein and Eibe Frank and Mark Hall");
205	result.setValue(Field.YEAR, "2009");
206	result.setValue(Field.TITLE, "Large Scale Attribute Selection Using Wrappers");
207	result.setValue(Field.BOOKTITLE, "Proc IEEE Symposium on Computational Intelligence and Data Mining");
208	result.setValue(Field.PAGES, "332-339");
209	result.setValue(Field.PUBLISHER, "IEEE");
210
211	additional = result.add(Type.MASTERSTHESIS);
212	additional.setValue(Field.AUTHOR, "Martin Guetlein");
213	additional.setValue(Field.YEAR, "2006");
214	additional.setValue(Field.TITLE, "Large Scale Attribute Selection Using Wrappers");
215	additional.setValue(Field.SCHOOL, "Albert-Ludwigs-Universitaet");
216	additional.setValue(Field.ADDRESS, "Freiburg, Germany");
217
218	return result;
219	}
220
221	/**
222	* Returns an enumeration describing the available options.
223	*
224	* @return an enumeration of all the available options.
225	*
226	*/
227	public Enumeration listOptions() {
228	Vector newVector = new Vector(9);
229
230	newVector.addElement(new Option("\tPerform initial ranking to select the" +
231	"\n\ttop-ranked attributes.", "I", 0, "-I"));
232	newVector.addElement(new Option(
233	"\tNumber of top-ranked attributes that are " +
234	"\n\ttaken into account by the search.", "K", 1, "-K <num>"));
235	newVector.addElement(new Option(
236	"\tType of Linear Forward Selection (default = 0).", "T", 1,
237	"-T <0 = fixed-set \| 1 = fixed-width>"));
238	newVector.addElement(new Option(
239	"\tSize of lookup cache for evaluated subsets." +
240	"\n\tExpressed as a multiple of the number of" +
241	"\n\tattributes in the data set. (default = 1)", "S", 1, "-S <num>"));
242	newVector.addElement(new Option(
243	"\tSubset-evaluator used for subset-size determination." + "-- -M",
244	"E", 1, "-E <subset evaluator>"));
245	newVector.addElement(new Option("\tNumber of cross validation folds" +
246	"\n\tfor subset size determination (default = 5).", "F", 1, "-F <num>"));
247	newVector.addElement(new Option("\tSeed for cross validation" +
248	"\n\tsubset size determination. (default = 1)", "R", 1, "-R <num>"));
249	newVector.addElement(new Option("\tverbose on/off", "Z", 0, "-Z"));
250
251	if ((m_setSizeEval != null) && (m_setSizeEval instanceof OptionHandler)) {
252	newVector.addElement(new Option("", "", 0,
253	"\nOptions specific to " + "evaluator " +
254	m_setSizeEval.getClass().getName() + ":"));
255
256	Enumeration enu = ((OptionHandler) m_setSizeEval).listOptions();
257
258	while (enu.hasMoreElements()) {
259	newVector.addElement(enu.nextElement());
260	}
261	}
262
263	return newVector.elements();
264	}
265
266	/**
267	* Parses a given list of options.
268	*
269	* Valid options are:
270	* <p>
271	*
272	* -I <br>
273	* Perform initial ranking to select top-ranked attributes.
274	* <p>
275	*
276	* -K <num> <br>
277	* Number of top-ranked attributes that are taken into account.
278	* <p>
279	*
280	* -T <0 = fixed-set \| 1 = fixed-width> <br>
281	* Typ of Linear Forward Selection (default = 0).
282	* <p>
283	*
284	* -S <num> <br>
285	* Size of lookup cache for evaluated subsets. Expressed as a multiple of
286	* the number of attributes in the data set. (default = 1).
287	* <p>
288	*
289	* -E <string> <br>
290	* class name of subset evaluator to use for subset size determination
291	* (default = null, same subset evaluator as for ranking and final forward
292	* selection is used). Place any evaluator options LAST on the command line
293	* following a "--". eg. -A weka.attributeSelection.ClassifierSubsetEval ... --
294	* -M
295	*
296	* </pre>
297	*
298	* -F <num> <br>
299	* Number of cross validation folds for subset size determination (default =
300	* 5).
301	* <p>
302	*
303	* -R <num> <br>
304	* Seed for cross validation subset size determination. (default = 1)
305	* <p>
306	*
307	* -Z <br>
308	* verbose on/off.
309	* <p>
310	*
311	* @param options
312	* the list of options as an array of strings
313	* @exception Exception
314	* if an option is not supported
315	*
316	*/
317	public void setOptions(String[] options) throws Exception {
318	String optionString;
319	resetOptions();
320
321	setPerformRanking(Utils.getFlag('I', options));
322
323	optionString = Utils.getOption('K', options);
324
325	if (optionString.length() != 0) {
326	setNumUsedAttributes(Integer.parseInt(optionString));
327	}
328
329	optionString = Utils.getOption('T', options);
330
331	if (optionString.length() != 0) {
332	setType(new SelectedTag(Integer.parseInt(optionString), TAGS_TYPE));
333	} else {
334	setType(new SelectedTag(TYPE_FIXED_SET, TAGS_TYPE));
335	}
336
337	optionString = Utils.getOption('S', options);
338
339	if (optionString.length() != 0) {
340	setLookupCacheSize(Integer.parseInt(optionString));
341	}
342
343	optionString = Utils.getOption('E', options);
344
345	if (optionString.length() == 0) {
346	System.out.println(
347	"No subset size evaluator given, using evaluator that is used for final search.");
348	m_setSizeEval = null;
349	} else {
350	setSubsetSizeEvaluator(ASEvaluation.forName(optionString,
351	Utils.partitionOptions(options)));
352	}
353
354	optionString = Utils.getOption('F', options);
355
356	if (optionString.length() != 0) {
357	setNumSubsetSizeCVFolds(Integer.parseInt(optionString));
358	}
359
360	optionString = Utils.getOption('R', options);
361
362	if (optionString.length() != 0) {
363	setSeed(Integer.parseInt(optionString));
364	}
365
366	m_verbose = Utils.getFlag('Z', options);
367	}
368
369	/**
370	* Set the maximum size of the evaluated subset cache (hashtable). This is
371	* expressed as a multiplier for the number of attributes in the data set.
372	* (default = 1).
373	*
374	* @param size
375	* the maximum size of the hashtable
376	*/
377	public void setLookupCacheSize(int size) {
378	if (size >= 0) {
379	m_cacheSize = size;
380	}
381	}
382
383	/**
384	* Return the maximum size of the evaluated subset cache (expressed as a
385	* multiplier for the number of attributes in a data set.
386	*
387	* @return the maximum size of the hashtable.
388	*/
389	public int getLookupCacheSize() {
390	return m_cacheSize;
391	}
392
393	/**
394	* Returns the tip text for this property
395	*
396	* @return tip text for this property suitable for displaying in the
397	* explorer/experimenter gui
398	*/
399	public String lookupCacheSizeTipText() {
400	return "Set the maximum size of the lookup cache of evaluated subsets. This is " +
401	"expressed as a multiplier of the number of attributes in the data set. " +
402	"(default = 1).";
403	}
404
405	/**
406	* Returns the tip text for this property
407	*
408	* @return tip text for this property suitable for displaying in the
409	* explorer/experimenter gui
410	*/
411	public String performRankingTipText() {
412	return "Perform initial ranking to select top-ranked attributes.";
413	}
414
415	/**
416	* Perform initial ranking to select top-ranked attributes.
417	*
418	* @param b
419	* true if initial ranking should be performed
420	*/
421	public void setPerformRanking(boolean b) {
422	m_performRanking = b;
423	}
424
425	/**
426	* Get boolean if initial ranking should be performed to select the
427	* top-ranked attributes
428	*
429	* @return true if initial ranking should be performed
430	*/
431	public boolean getPerformRanking() {
432	return m_performRanking;
433	}
434
435	/**
436	* Returns the tip text for this property
437	*
438	* @return tip text for this property suitable for displaying in the
439	* explorer/experimenter gui
440	*/
441	public String numUsedAttributesTipText() {
442	return "Set the amount of top-ranked attributes that are taken into account by the search process.";
443	}
444
445	/**
446	* Set the number of top-ranked attributes that taken into account by the
447	* search process.
448	*
449	* @param k
450	* the number of attributes
451	* @exception Exception
452	* if k is less than 2
453	*/
454	public void setNumUsedAttributes(int k) throws Exception {
455	if (k < 2) {
456	throw new Exception("Value of -K must be >= 2.");
457	}
458
459	m_numUsedAttributes = k;
460	}
461
462	/**
463	* Get the number of top-ranked attributes that taken into account by the
464	* search process.
465	*
466	* @return the number of top-ranked attributes that taken into account
467	*/
468	public int getNumUsedAttributes() {
469	return m_numUsedAttributes;
470	}
471
472	/**
473	* Returns the tip text for this property
474	*
475	* @return tip text for this property suitable for displaying in the
476	* explorer/experimenter gui
477	*/
478	public String typeTipText() {
479	return "Set the type of the search.";
480	}
481
482	/**
483	* Set the type
484	*
485	* @param t
486	* the Linear Forward Selection type
487	*/
488	public void setType(SelectedTag t) {
489	if (t.getTags() == TAGS_TYPE) {
490	m_linearSelectionType = t.getSelectedTag().getID();
491	}
492	}
493
494	/**
495	* Get the type
496	*
497	* @return the Linear Forward Selection type
498	*/
499	public SelectedTag getType() {
500	return new SelectedTag(m_linearSelectionType, TAGS_TYPE);
501	}
502
503	/**
504	* Returns the tip text for this property
505	*
506	* @return tip text for this property suitable for displaying in the
507	* explorer/experimenter gui
508	*/
509	public String subsetSizeEvaluatorTipText() {
510	return "Subset evaluator to use for subset size determination.";
511	}
512
513	/**
514	* Set the subset evaluator to use for subset size determination.
515	*
516	* @param eval
517	* the subset evaluator to use for subset size determination.
518	*/
519	public void setSubsetSizeEvaluator(ASEvaluation eval)
520	throws Exception {
521	if (!(eval instanceof SubsetEvaluator)) {
522	throw new Exception(eval.getClass().getName() +
523	" is no subset evaluator.");
524	}
525
526	m_setSizeEval = eval;
527	}
528
529	/**
530	* Get the subset evaluator used for subset size determination.
531	*
532	* @return the evaluator used for subset size determination.
533	*/
534	public ASEvaluation getSubsetSizeEvaluator() {
535	return m_setSizeEval;
536	}
537
538	/**
539	* Returns the tip text for this property
540	*
541	* @return tip text for this property suitable for displaying in the
542	* explorer/experimenter gui
543	*/
544	public String numSubsetSizeCVFoldsTipText() {
545	return "Number of cross validation folds for subset size determination";
546	}
547
548	/**
549	* Set the number of cross validation folds for subset size determination
550	* (default = 5).
551	*
552	* @param f
553	* number of folds
554	*/
555	public void setNumSubsetSizeCVFolds(int f) {
556	m_numFolds = f;
557	}
558
559	/**
560	* Get the number of cross validation folds for subset size determination
561	* (default = 5).
562	*
563	* @return number of folds
564	*/
565	public int getNumSubsetSizeCVFolds() {
566	return m_numFolds;
567	}
568
569	/**
570	* Returns the tip text for this property
571	*
572	* @return tip text for this property suitable for displaying in the
573	* explorer/experimenter gui
574	*/
575	public String seedTipText() {
576	return "Seed for cross validation subset size determination. (default = 1)";
577	}
578
579	/**
580	* Seed for cross validation subset size determination. (default = 1)
581	*
582	* @param s
583	* seed
584	*/
585	public void setSeed(int s) {
586	m_seed = s;
587	}
588
589	/**
590	* Seed for cross validation subset size determination. (default = 1)
591	*
592	* @return seed
593	*/
594	public int getSeed() {
595	return m_seed;
596	}
597
598	/**
599	* Returns the tip text for this property
600	*
601	* @return tip text for this property suitable for displaying in the
602	* explorer/experimenter gui
603	*/
604	public String verboseTipText() {
605	return "Turn on verbose output for monitoring the search's progress.";
606	}
607
608	/**
609	* Set whether verbose output should be generated.
610	*
611	* @param b
612	* true if output is to be verbose.
613	*/
614	public void setVerbose(boolean b) {
615	m_verbose = b;
616	}
617
618	/**
619	* Get whether output is to be verbose
620	*
621	* @return true if output will be verbose
622	*/
623	public boolean getVerbose() {
624	return m_verbose;
625	}
626
627	/**
628	* Gets the current settings of LinearForwardSelection.
629	*
630	* @return an array of strings suitable for passing to setOptions()
631	*/
632	public String[] getOptions() {
633	String[] evaluatorOptions = new String[0];
634
635	if ((m_setSizeEval != null) && (m_setSizeEval instanceof OptionHandler)) {
636	evaluatorOptions = ((OptionHandler) m_setSizeEval).getOptions();
637	}
638
639	String[] options = new String[15 + evaluatorOptions.length];
640	int current = 0;
641
642	if (m_performRanking) {
643	options[current++] = "-I";
644	}
645
646	options[current++] = "-K";
647	options[current++] = "" + m_numUsedAttributes;
648	options[current++] = "-T";
649	options[current++] = "" + m_linearSelectionType;
650
651	options[current++] = "-F";
652	options[current++] = "" + m_numFolds;
653	options[current++] = "-S";
654	options[current++] = "" + m_seed;
655
656	options[current++] = "-Z";
657	options[current++] = "" + m_verbose;
658
659	if (m_setSizeEval != null) {
660	options[current++] = "-E";
661	options[current++] = m_setSizeEval.getClass().getName();
662	}
663
664	options[current++] = "--";
665	System.arraycopy(evaluatorOptions, 0, options, current,
666	evaluatorOptions.length);
667	current += evaluatorOptions.length;
668
669	while (current < options.length) {
670	options[current++] = "";
671	}
672
673	return options;
674	}
675
676	/**
677	* returns a description of the search as a String
678	*
679	* @return a description of the search
680	*/
681	public String toString() {
682	StringBuffer LFSString = new StringBuffer();
683
684	LFSString.append("\tSubset Size Forward Selection.\n");
685
686	LFSString.append("\tLinear Forward Selection Type: ");
687
688	if (m_linearSelectionType == TYPE_FIXED_SET) {
689	LFSString.append("fixed-set\n");
690	} else {
691	LFSString.append("fixed-width\n");
692	}
693
694	LFSString.append("\tNumber of top-ranked attributes that are used: " +
695	m_numUsedAttributes + "\n");
696
697	LFSString.append(
698	"\tNumber of cross validation folds for subset size determination: " +
699	m_numFolds + "\n");
700	LFSString.append("\tSeed for cross validation subset size determination: " +
701	m_seed + "\n");
702
703	LFSString.append("\tTotal number of subsets evaluated: " + m_totalEvals +
704	"\n");
705	LFSString.append("\tMerit of best subset found: " +
706	Utils.doubleToString(Math.abs(m_bestMerit), 8, 3) + "\n");
707
708	return LFSString.toString();
709	}
710
711	/**
712	* Searches the attribute subset space by subset size forward selection
713	*
714	* @param ASEval
715	* the attribute evaluator to guide the search
716	* @param data
717	* the training instances.
718	* @return an array (not necessarily ordered) of selected attribute indexes
719	* @exception Exception
720	* if the search can't be completed
721	*/
722	public int[] search(ASEvaluation ASEval, Instances data)
723	throws Exception {
724	m_totalEvals = 0;
725
726	if (!(ASEval instanceof SubsetEvaluator)) {
727	throw new Exception(ASEval.getClass().getName() + " is not a " +
728	"Subset evaluator!");
729	}
730
731	if (m_setSizeEval == null) {
732	m_setSizeEval = ASEval;
733	}
734
735	m_numAttribs = data.numAttributes();
736
737	if (m_numUsedAttributes > m_numAttribs) {
738	System.out.println(
739	"Decreasing number of top-ranked attributes to total number of attributes: " +
740	data.numAttributes());
741	m_numUsedAttributes = m_numAttribs;
742	}
743
744	Instances[] trainData = new Instances[m_numFolds];
745	Instances[] testData = new Instances[m_numFolds];
746	LFSMethods[] searchResults = new LFSMethods[m_numFolds];
747
748	Random random = new Random(m_seed);
749	Instances dataCopy = new Instances(data);
750	dataCopy.randomize(random);
751
752	if (dataCopy.classAttribute().isNominal()) {
753	dataCopy.stratify(m_numFolds);
754	}
755
756	for (int f = 0; f < m_numFolds; f++) {
757	trainData[f] = dataCopy.trainCV(m_numFolds, f, random);
758	testData[f] = dataCopy.testCV(m_numFolds, f);
759	}
760
761	LFSMethods LSF = new LFSMethods();
762
763	int[] ranking;
764
765	if (m_performRanking) {
766	ASEval.buildEvaluator(data);
767	ranking = LSF.rankAttributes(data, (SubsetEvaluator) ASEval, m_verbose);
768	} else {
769	ranking = new int[m_numAttribs];
770
771	for (int i = 0; i < ranking.length; i++) {
772	ranking[i] = i;
773	}
774	}
775
776	int maxSubsetSize = 0;
777
778	for (int f = 0; f < m_numFolds; f++) {
779	if (m_verbose) {
780	System.out.println("perform search on internal fold: " + (f + 1) + "/" +
781	m_numFolds);
782	}
783
784	m_setSizeEval.buildEvaluator(trainData[f]);
785	searchResults[f] = new LFSMethods();
786	searchResults[f].forwardSearch(m_cacheSize, new BitSet(m_numAttribs),
787	ranking, m_numUsedAttributes,
788	m_linearSelectionType == TYPE_FIXED_WIDTH, 1, -1, trainData[f],
789	(SubsetEvaluator)m_setSizeEval, m_verbose);
790
791	maxSubsetSize = Math.max(maxSubsetSize,
792	searchResults[f].getBestGroup().cardinality());
793	}
794
795	if (m_verbose) {
796	System.out.println(
797	"continue searches on internal folds to maxSubsetSize (" +
798	maxSubsetSize + ")");
799	}
800
801	for (int f = 0; f < m_numFolds; f++) {
802	if (m_verbose) {
803	System.out.print("perform search on internal fold: " + (f + 1) + "/" +
804	m_numFolds + " with starting set ");
805	LFSMethods.printGroup(searchResults[f].getBestGroup(),
806	trainData[f].numAttributes());
807	}
808
809	if (searchResults[f].getBestGroup().cardinality() < maxSubsetSize) {
810	m_setSizeEval.buildEvaluator(trainData[f]);
811	searchResults[f].forwardSearch(m_cacheSize,
812	searchResults[f].getBestGroup(), ranking, m_numUsedAttributes,
813	m_linearSelectionType == TYPE_FIXED_WIDTH, 1, maxSubsetSize,
814	trainData[f], (SubsetEvaluator)m_setSizeEval, m_verbose);
815	}
816	}
817
818	double[][] testMerit = new double[m_numFolds][maxSubsetSize + 1];
819
820	for (int f = 0; f < m_numFolds; f++) {
821	for (int s = 1; s <= maxSubsetSize; s++) {
822	if (HoldOutSubsetEvaluator.class.isInstance(m_setSizeEval)) {
823	m_setSizeEval.buildEvaluator(trainData[f]);
824	testMerit[f][s] = ((HoldOutSubsetEvaluator) m_setSizeEval).evaluateSubset(searchResults[f].getBestGroupOfSize(
825	s), testData[f]);
826	} else {
827	m_setSizeEval.buildEvaluator(testData[f]);
828	testMerit[f][s] = ((SubsetEvaluator)m_setSizeEval).evaluateSubset(searchResults[f].getBestGroupOfSize(
829	s));
830	}
831	}
832	}
833
834	double[] avgTestMerit = new double[maxSubsetSize + 1];
835	int finalSubsetSize = -1;
836
837	for (int s = 1; s <= maxSubsetSize; s++) {
838	for (int f = 0; f < m_numFolds; f++) {
839	avgTestMerit[s] = ((avgTestMerit[s] * f) + testMerit[f][s]) / (double) (f +
840	1);
841	}
842
843	if ((finalSubsetSize == -1) \|\|
844	(avgTestMerit[s] > avgTestMerit[finalSubsetSize])) {
845	finalSubsetSize = s;
846	}
847
848	if (m_verbose) {
849	System.out.println("average merit for subset-size " + s + ": " +
850	avgTestMerit[s]);
851	}
852	}
853
854	if (m_verbose) {
855	System.out.println("performing final forward selection to subset-size: " +
856	finalSubsetSize);
857	}
858
859	ASEval.buildEvaluator(data);
860	LSF.forwardSearch(m_cacheSize, new BitSet(m_numAttribs), ranking,
861	m_numUsedAttributes, m_linearSelectionType == TYPE_FIXED_WIDTH, 1,
862	finalSubsetSize, data, (SubsetEvaluator) ASEval, m_verbose);
863
864	m_totalEvals = LSF.getNumEvalsTotal();
865	m_bestMerit = LSF.getBestMerit();
866
867	return attributeList(LSF.getBestGroup());
868	}
869
870	/**
871	* Reset options to default values
872	*/
873	protected void resetOptions() {
874	m_performRanking = true;
875	m_numUsedAttributes = 50;
876	m_linearSelectionType = TYPE_FIXED_SET;
877	m_setSizeEval = new ClassifierSubsetEval();
878	m_numFolds = 5;
879	m_seed = 1;
880	m_totalEvals = 0;
881	m_cacheSize = 1;
882	m_verbose = false;
883	}
884
885	/**
886	* converts a BitSet into a list of attribute indexes
887	*
888	* @param group
889	* the BitSet to convert
890	* @return an array of attribute indexes
891	*/
892	protected int[] attributeList(BitSet group) {
893	int count = 0;
894
895	// count how many were selected
896	for (int i = 0; i < m_numAttribs; i++) {
897	if (group.get(i)) {
898	count++;
899	}
900	}
901
902	int[] list = new int[count];
903	count = 0;
904
905	for (int i = 0; i < m_numAttribs; i++) {
906	if (group.get(i)) {
907	list[count++] = i;
908	}
909	}
910
911	return list;
912	}
913
914	/**
915	* Returns the revision string.
916	*
917	* @return the revision
918	*/
919	public String getRevision() {
920	return RevisionUtils.extract("$Revision: 5604 $");
921	}
922	}
923

Note: See TracBrowser for help on using the repository browser.

Download in other formats: