1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * LinearForwardSelection.java |
---|
19 | * Copyright (C) 2007 Martin Guetlein |
---|
20 | * |
---|
21 | */ |
---|
22 | |
---|
23 | package weka.attributeSelection; |
---|
24 | |
---|
25 | import weka.core.Instances; |
---|
26 | import weka.core.Option; |
---|
27 | import weka.core.OptionHandler; |
---|
28 | import weka.core.Range; |
---|
29 | import weka.core.RevisionUtils; |
---|
30 | import weka.core.SelectedTag; |
---|
31 | import weka.core.Tag; |
---|
32 | import weka.core.Utils; |
---|
33 | import weka.core.TechnicalInformation; |
---|
34 | import weka.core.TechnicalInformationHandler; |
---|
35 | import weka.core.TechnicalInformation.Field; |
---|
36 | import weka.core.TechnicalInformation.Type; |
---|
37 | import java.util.BitSet; |
---|
38 | import java.util.Enumeration; |
---|
39 | import java.util.Vector; |
---|
40 | |
---|
41 | |
---|
42 | /** |
---|
43 | <!-- globalinfo-start --> |
---|
44 | * LinearForwardSelection:<br/> |
---|
45 | * <br/> |
---|
46 | * Extension of BestFirst. Takes a restricted number of k attributes into account. Fixed-set selects a fixed number k of attributes, whereas k is increased in each step when fixed-width is selected. The search uses either the initial ordering to select the top k attributes, or performs a ranking (with the same evalutator the search uses later on). The search direction can be forward, or floating forward selection (with opitional backward search steps).<br/> |
---|
47 | * <br/> |
---|
48 | * For more information see:<br/> |
---|
49 | * <br/> |
---|
50 | * Martin Guetlein, Eibe Frank, Mark Hall, Andreas Karwath: Large Scale Attribute Selection Using Wrappers. In: Proc IEEE Symposium on Computational Intelligence and Data Mining, 332-339, 2009.<br/> |
---|
51 | * <br/> |
---|
52 | * Martin Guetlein (2006). Large Scale Attribute Selection Using Wrappers. Freiburg, Germany. |
---|
53 | * <p/> |
---|
54 | <!-- globalinfo-end --> |
---|
55 | * |
---|
56 | <!-- options-start --> |
---|
57 | * Valid options are: <p/> |
---|
58 | * |
---|
59 | * <pre> -P <start set> |
---|
60 | * Specify a starting set of attributes. |
---|
61 | * Eg. 1,3,5-7.</pre> |
---|
62 | * |
---|
63 | * <pre> -D <0 = forward selection | 1 = floating forward selection> |
---|
64 | * Forward selection method. (default = 0).</pre> |
---|
65 | * |
---|
66 | * <pre> -N <num> |
---|
67 | * Number of non-improving nodes to |
---|
68 | * consider before terminating search.</pre> |
---|
69 | * |
---|
70 | * <pre> -I |
---|
71 | * Perform initial ranking to select the |
---|
72 | * top-ranked attributes.</pre> |
---|
73 | * |
---|
74 | * <pre> -K <num> |
---|
75 | * Number of top-ranked attributes that are |
---|
76 | * taken into account by the search.</pre> |
---|
77 | * |
---|
78 | * <pre> -T <0 = fixed-set | 1 = fixed-width> |
---|
79 | * Type of Linear Forward Selection (default = 0).</pre> |
---|
80 | * |
---|
81 | * <pre> -S <num> |
---|
82 | * Size of lookup cache for evaluated subsets. |
---|
83 | * Expressed as a multiple of the number of |
---|
84 | * attributes in the data set. (default = 1)</pre> |
---|
85 | * |
---|
86 | * <pre> -Z |
---|
87 | * verbose on/off</pre> |
---|
88 | * |
---|
89 | <!-- options-end --> |
---|
90 | * |
---|
91 | * @author Martin Guetlein (martin.guetlein@gmail.com) |
---|
92 | * @version $Revision: 6160 $ |
---|
93 | */ |
---|
94 | public class LinearForwardSelection |
---|
95 | extends ASSearch |
---|
96 | implements OptionHandler, |
---|
97 | StartSetHandler, |
---|
98 | TechnicalInformationHandler { |
---|
99 | /** search directions */ |
---|
100 | protected static final int SEARCH_METHOD_FORWARD = 0; |
---|
101 | protected static final int SEARCH_METHOD_FLOATING = 1; |
---|
102 | public static final Tag[] TAGS_SEARCH_METHOD = { |
---|
103 | new Tag(SEARCH_METHOD_FORWARD, "Forward selection"), |
---|
104 | new Tag(SEARCH_METHOD_FLOATING, "Floating forward selection"), |
---|
105 | }; |
---|
106 | |
---|
107 | /** search directions */ |
---|
108 | protected static final int TYPE_FIXED_SET = 0; |
---|
109 | protected static final int TYPE_FIXED_WIDTH = 1; |
---|
110 | public static final Tag[] TAGS_TYPE = { |
---|
111 | new Tag(TYPE_FIXED_SET, "Fixed-set"), |
---|
112 | new Tag(TYPE_FIXED_WIDTH, "Fixed-width"), |
---|
113 | }; |
---|
114 | |
---|
115 | // member variables |
---|
116 | /** maximum number of stale nodes before terminating search */ |
---|
117 | protected int m_maxStale; |
---|
118 | |
---|
119 | /** 0 == forward selection, 1 == floating forward search */ |
---|
120 | protected int m_forwardSearchMethod; |
---|
121 | |
---|
122 | /** perform initial ranking to select top-ranked attributes */ |
---|
123 | protected boolean m_performRanking; |
---|
124 | |
---|
125 | /** |
---|
126 | * number of top-ranked attributes that are taken into account for the |
---|
127 | * search |
---|
128 | */ |
---|
129 | protected int m_numUsedAttributes; |
---|
130 | |
---|
131 | /** 0 == fixed-set, 1 == fixed-width */ |
---|
132 | protected int m_linearSelectionType; |
---|
133 | |
---|
134 | /** holds an array of starting attributes */ |
---|
135 | protected int[] m_starting; |
---|
136 | |
---|
137 | /** holds the start set for the search as a Range */ |
---|
138 | protected Range m_startRange; |
---|
139 | |
---|
140 | /** does the data have a class */ |
---|
141 | protected boolean m_hasClass; |
---|
142 | |
---|
143 | /** holds the class index */ |
---|
144 | protected int m_classIndex; |
---|
145 | |
---|
146 | /** number of attributes in the data */ |
---|
147 | protected int m_numAttribs; |
---|
148 | |
---|
149 | /** total number of subsets evaluated during a search */ |
---|
150 | protected int m_totalEvals; |
---|
151 | |
---|
152 | /** for debugging */ |
---|
153 | protected boolean m_verbose; |
---|
154 | |
---|
155 | /** holds the merit of the best subset found */ |
---|
156 | protected double m_bestMerit; |
---|
157 | |
---|
158 | /** holds the maximum size of the lookup cache for evaluated subsets */ |
---|
159 | protected int m_cacheSize; |
---|
160 | |
---|
161 | /** |
---|
162 | * Constructor |
---|
163 | */ |
---|
164 | public LinearForwardSelection() { |
---|
165 | resetOptions(); |
---|
166 | } |
---|
167 | |
---|
168 | /** |
---|
169 | * Returns a string describing this search method |
---|
170 | * |
---|
171 | * @return a description of the search method suitable for displaying in the |
---|
172 | * explorer/experimenter gui |
---|
173 | */ |
---|
174 | public String globalInfo() { |
---|
175 | return "LinearForwardSelection:\n\n" + |
---|
176 | "Extension of BestFirst. Takes a restricted number of k attributes " + |
---|
177 | "into account. Fixed-set selects a fixed number k of attributes, " + |
---|
178 | "whereas k is increased in each step when fixed-width is selected. " + |
---|
179 | "The search uses either the initial ordering to select the " + |
---|
180 | "top k attributes, or performs a ranking (with the same evalutator the " + |
---|
181 | "search uses later on). The search direction can be forward, " + |
---|
182 | "or floating forward selection (with opitional backward search steps).\n\n" |
---|
183 | + "For more information see:\n\n" |
---|
184 | + getTechnicalInformation().toString(); |
---|
185 | } |
---|
186 | |
---|
187 | /** |
---|
188 | * Returns an instance of a TechnicalInformation object, containing |
---|
189 | * detailed information about the technical background of this class, |
---|
190 | * e.g., paper reference or book this class is based on. |
---|
191 | * |
---|
192 | * @return the technical information about this class |
---|
193 | */ |
---|
194 | public TechnicalInformation getTechnicalInformation() { |
---|
195 | TechnicalInformation result; |
---|
196 | TechnicalInformation additional; |
---|
197 | |
---|
198 | result = new TechnicalInformation(Type.INPROCEEDINGS); |
---|
199 | result.setValue(Field.AUTHOR, "Martin Guetlein and Eibe Frank and Mark Hall and Andreas Karwath"); |
---|
200 | result.setValue(Field.YEAR, "2009"); |
---|
201 | result.setValue(Field.TITLE, "Large Scale Attribute Selection Using Wrappers"); |
---|
202 | result.setValue(Field.BOOKTITLE, "Proc IEEE Symposium on Computational Intelligence and Data Mining"); |
---|
203 | result.setValue(Field.PAGES, "332-339"); |
---|
204 | result.setValue(Field.PUBLISHER, "IEEE"); |
---|
205 | |
---|
206 | additional = result.add(Type.MASTERSTHESIS); |
---|
207 | additional.setValue(Field.AUTHOR, "Martin Guetlein"); |
---|
208 | additional.setValue(Field.YEAR, "2006"); |
---|
209 | additional.setValue(Field.TITLE, "Large Scale Attribute Selection Using Wrappers"); |
---|
210 | additional.setValue(Field.SCHOOL, "Albert-Ludwigs-Universitaet"); |
---|
211 | additional.setValue(Field.ADDRESS, "Freiburg, Germany"); |
---|
212 | |
---|
213 | return result; |
---|
214 | } |
---|
215 | |
---|
216 | /** |
---|
217 | * Returns an enumeration describing the available options. |
---|
218 | * |
---|
219 | * @return an enumeration of all the available options. |
---|
220 | * |
---|
221 | */ |
---|
222 | public Enumeration listOptions() { |
---|
223 | Vector newVector = new Vector(8); |
---|
224 | |
---|
225 | newVector.addElement(new Option("\tSpecify a starting set of attributes." + |
---|
226 | "\n\tEg. 1,3,5-7.", "P", 1, "-P <start set>")); |
---|
227 | newVector.addElement(new Option( |
---|
228 | "\tForward selection method. (default = 0).", "D", 1, |
---|
229 | "-D <0 = forward selection | 1 = floating forward selection>")); |
---|
230 | newVector.addElement(new Option("\tNumber of non-improving nodes to" + |
---|
231 | "\n\tconsider before terminating search.", "N", 1, "-N <num>")); |
---|
232 | newVector.addElement(new Option("\tPerform initial ranking to select the" + |
---|
233 | "\n\ttop-ranked attributes.", "I", 0, "-I")); |
---|
234 | newVector.addElement(new Option( |
---|
235 | "\tNumber of top-ranked attributes that are " + |
---|
236 | "\n\ttaken into account by the search.", "K", 1, "-K <num>")); |
---|
237 | newVector.addElement(new Option( |
---|
238 | "\tType of Linear Forward Selection (default = 0).", "T", 1, |
---|
239 | "-T <0 = fixed-set | 1 = fixed-width>")); |
---|
240 | newVector.addElement(new Option( |
---|
241 | "\tSize of lookup cache for evaluated subsets." + |
---|
242 | "\n\tExpressed as a multiple of the number of" + |
---|
243 | "\n\tattributes in the data set. (default = 1)", "S", 1, "-S <num>")); |
---|
244 | newVector.addElement(new Option("\tverbose on/off", "Z", 0, "-Z")); |
---|
245 | |
---|
246 | return newVector.elements(); |
---|
247 | } |
---|
248 | |
---|
249 | /** |
---|
250 | * Parses a given list of options. |
---|
251 | * |
---|
252 | * Valid options are: |
---|
253 | * <p> |
---|
254 | * |
---|
255 | * -P <start set> <br> |
---|
256 | * Specify a starting set of attributes. Eg 1,4,7-9. |
---|
257 | * <p> |
---|
258 | * |
---|
259 | * -D <0 = forward selection | 1 = floating forward selection> <br> |
---|
260 | * Forward selection method of the search. (default = 0). |
---|
261 | * <p> |
---|
262 | * |
---|
263 | * -N <num> <br> |
---|
264 | * Number of non improving nodes to consider before terminating search. |
---|
265 | * (default = 5). |
---|
266 | * <p> |
---|
267 | * |
---|
268 | * -I <br> |
---|
269 | * Perform initial ranking to select top-ranked attributes. |
---|
270 | * <p> |
---|
271 | * |
---|
272 | * -K <num> <br> |
---|
273 | * Number of top-ranked attributes that are taken into account. |
---|
274 | * <p> |
---|
275 | * |
---|
276 | * -T <0 = fixed-set | 1 = fixed-width> <br> |
---|
277 | * Typ of Linear Forward Selection (default = 0). |
---|
278 | * <p> |
---|
279 | * |
---|
280 | * -S <num> <br> |
---|
281 | * Size of lookup cache for evaluated subsets. Expressed as a multiple of |
---|
282 | * the number of attributes in the data set. (default = 1). |
---|
283 | * <p> |
---|
284 | * |
---|
285 | * -Z <br> |
---|
286 | * verbose on/off. |
---|
287 | * <p> |
---|
288 | * |
---|
289 | * @param options |
---|
290 | * the list of options as an array of strings |
---|
291 | * @exception Exception |
---|
292 | * if an option is not supported |
---|
293 | * |
---|
294 | */ |
---|
295 | public void setOptions(String[] options) throws Exception { |
---|
296 | String optionString; |
---|
297 | resetOptions(); |
---|
298 | |
---|
299 | optionString = Utils.getOption('P', options); |
---|
300 | |
---|
301 | if (optionString.length() != 0) { |
---|
302 | setStartSet(optionString); |
---|
303 | } |
---|
304 | |
---|
305 | optionString = Utils.getOption('D', options); |
---|
306 | |
---|
307 | if (optionString.length() != 0) { |
---|
308 | setForwardSelectionMethod(new SelectedTag(Integer.parseInt(optionString), |
---|
309 | TAGS_SEARCH_METHOD)); |
---|
310 | } else { |
---|
311 | setForwardSelectionMethod(new SelectedTag(SEARCH_METHOD_FORWARD, |
---|
312 | TAGS_SEARCH_METHOD)); |
---|
313 | } |
---|
314 | |
---|
315 | optionString = Utils.getOption('N', options); |
---|
316 | |
---|
317 | if (optionString.length() != 0) { |
---|
318 | setSearchTermination(Integer.parseInt(optionString)); |
---|
319 | } |
---|
320 | |
---|
321 | setPerformRanking(Utils.getFlag('I', options)); |
---|
322 | |
---|
323 | optionString = Utils.getOption('K', options); |
---|
324 | |
---|
325 | if (optionString.length() != 0) { |
---|
326 | setNumUsedAttributes(Integer.parseInt(optionString)); |
---|
327 | } |
---|
328 | |
---|
329 | optionString = Utils.getOption('T', options); |
---|
330 | |
---|
331 | if (optionString.length() != 0) { |
---|
332 | setType(new SelectedTag(Integer.parseInt(optionString), TAGS_TYPE)); |
---|
333 | } else { |
---|
334 | setType(new SelectedTag(TYPE_FIXED_SET, TAGS_TYPE)); |
---|
335 | } |
---|
336 | |
---|
337 | optionString = Utils.getOption('S', options); |
---|
338 | |
---|
339 | if (optionString.length() != 0) { |
---|
340 | setLookupCacheSize(Integer.parseInt(optionString)); |
---|
341 | } |
---|
342 | |
---|
343 | m_verbose = Utils.getFlag('Z', options); |
---|
344 | } |
---|
345 | |
---|
346 | /** |
---|
347 | * Set the maximum size of the evaluated subset cache (hashtable). This is |
---|
348 | * expressed as a multiplier for the number of attributes in the data set. |
---|
349 | * (default = 1). |
---|
350 | * |
---|
351 | * @param size |
---|
352 | * the maximum size of the hashtable |
---|
353 | */ |
---|
354 | public void setLookupCacheSize(int size) { |
---|
355 | if (size >= 0) { |
---|
356 | m_cacheSize = size; |
---|
357 | } |
---|
358 | } |
---|
359 | |
---|
360 | /** |
---|
361 | * Return the maximum size of the evaluated subset cache (expressed as a |
---|
362 | * multiplier for the number of attributes in a data set. |
---|
363 | * |
---|
364 | * @return the maximum size of the hashtable. |
---|
365 | */ |
---|
366 | public int getLookupCacheSize() { |
---|
367 | return m_cacheSize; |
---|
368 | } |
---|
369 | |
---|
370 | /** |
---|
371 | * Returns the tip text for this property |
---|
372 | * |
---|
373 | * @return tip text for this property suitable for displaying in the |
---|
374 | * explorer/experimenter gui |
---|
375 | */ |
---|
376 | public String lookupCacheSizeTipText() { |
---|
377 | return "Set the maximum size of the lookup cache of evaluated subsets. This is " + |
---|
378 | "expressed as a multiplier of the number of attributes in the data set. " + |
---|
379 | "(default = 1)."; |
---|
380 | } |
---|
381 | |
---|
382 | /** |
---|
383 | * Returns the tip text for this property |
---|
384 | * |
---|
385 | * @return tip text for this property suitable for displaying in the |
---|
386 | * explorer/experimenter gui |
---|
387 | */ |
---|
388 | public String startSetTipText() { |
---|
389 | return "Set the start point for the search. This is specified as a comma " + |
---|
390 | "seperated list off attribute indexes starting at 1. It can include " + |
---|
391 | "ranges. Eg. 1,2,5-9,17."; |
---|
392 | } |
---|
393 | |
---|
394 | /** |
---|
395 | * Sets a starting set of attributes for the search. It is the search |
---|
396 | * method's responsibility to report this start set (if any) in its |
---|
397 | * toString() method. |
---|
398 | * |
---|
399 | * @param startSet |
---|
400 | * a string containing a list of attributes (and or ranges), eg. |
---|
401 | * 1,2,6,10-15. |
---|
402 | * @exception Exception |
---|
403 | * if start set can't be set. |
---|
404 | */ |
---|
405 | public void setStartSet(String startSet) throws Exception { |
---|
406 | m_startRange.setRanges(startSet); |
---|
407 | } |
---|
408 | |
---|
409 | /** |
---|
410 | * Returns a list of attributes (and or attribute ranges) as a String |
---|
411 | * |
---|
412 | * @return a list of attributes (and or attribute ranges) |
---|
413 | */ |
---|
414 | public String getStartSet() { |
---|
415 | return m_startRange.getRanges(); |
---|
416 | } |
---|
417 | |
---|
418 | /** |
---|
419 | * Returns the tip text for this property |
---|
420 | * |
---|
421 | * @return tip text for this property suitable for displaying in the |
---|
422 | * explorer/experimenter gui |
---|
423 | */ |
---|
424 | public String searchTerminationTipText() { |
---|
425 | return "Set the amount of backtracking. Specify the number of "; |
---|
426 | } |
---|
427 | |
---|
428 | /** |
---|
429 | * Set the numnber of non-improving nodes to consider before terminating |
---|
430 | * search. |
---|
431 | * |
---|
432 | * @param t |
---|
433 | * the number of non-improving nodes |
---|
434 | * @exception Exception |
---|
435 | * if t is less than 1 |
---|
436 | */ |
---|
437 | public void setSearchTermination(int t) throws Exception { |
---|
438 | if (t < 1) { |
---|
439 | throw new Exception("Value of -N must be > 0."); |
---|
440 | } |
---|
441 | |
---|
442 | m_maxStale = t; |
---|
443 | } |
---|
444 | |
---|
445 | /** |
---|
446 | * Get the termination criterion (number of non-improving nodes). |
---|
447 | * |
---|
448 | * @return the number of non-improving nodes |
---|
449 | */ |
---|
450 | public int getSearchTermination() { |
---|
451 | return m_maxStale; |
---|
452 | } |
---|
453 | |
---|
454 | /** |
---|
455 | * Returns the tip text for this property |
---|
456 | * |
---|
457 | * @return tip text for this property suitable for displaying in the |
---|
458 | * explorer/experimenter gui |
---|
459 | */ |
---|
460 | public String performRankingTipText() { |
---|
461 | return "Perform initial ranking to select top-ranked attributes."; |
---|
462 | } |
---|
463 | |
---|
464 | /** |
---|
465 | * Perform initial ranking to select top-ranked attributes. |
---|
466 | * |
---|
467 | * @param b |
---|
468 | * true if initial ranking should be performed |
---|
469 | */ |
---|
470 | public void setPerformRanking(boolean b) { |
---|
471 | m_performRanking = b; |
---|
472 | } |
---|
473 | |
---|
474 | /** |
---|
475 | * Get boolean if initial ranking should be performed to select the |
---|
476 | * top-ranked attributes |
---|
477 | * |
---|
478 | * @return true if initial ranking should be performed |
---|
479 | */ |
---|
480 | public boolean getPerformRanking() { |
---|
481 | return m_performRanking; |
---|
482 | } |
---|
483 | |
---|
484 | /** |
---|
485 | * Returns the tip text for this property |
---|
486 | * |
---|
487 | * @return tip text for this property suitable for displaying in the |
---|
488 | * explorer/experimenter gui |
---|
489 | */ |
---|
490 | public String numUsedAttributesTipText() { |
---|
491 | return "Set the amount of top-ranked attributes that are taken into account by the search process."; |
---|
492 | } |
---|
493 | |
---|
494 | /** |
---|
495 | * Set the number of top-ranked attributes that taken into account by the |
---|
496 | * search process. |
---|
497 | * |
---|
498 | * @param k |
---|
499 | * the number of attributes |
---|
500 | * @exception Exception |
---|
501 | * if k is less than 2 |
---|
502 | */ |
---|
503 | public void setNumUsedAttributes(int k) throws Exception { |
---|
504 | if (k < 2) { |
---|
505 | throw new Exception("Value of -K must be >= 2."); |
---|
506 | } |
---|
507 | |
---|
508 | m_numUsedAttributes = k; |
---|
509 | } |
---|
510 | |
---|
511 | /** |
---|
512 | * Get the number of top-ranked attributes that taken into account by the |
---|
513 | * search process. |
---|
514 | * |
---|
515 | * @return the number of top-ranked attributes that taken into account |
---|
516 | */ |
---|
517 | public int getNumUsedAttributes() { |
---|
518 | return m_numUsedAttributes; |
---|
519 | } |
---|
520 | |
---|
521 | /** |
---|
522 | * Returns the tip text for this property |
---|
523 | * |
---|
524 | * @return tip text for this property suitable for displaying in the |
---|
525 | * explorer/experimenter gui |
---|
526 | */ |
---|
527 | public String forwardSelectionMethodTipText() { |
---|
528 | return "Set the direction of the search."; |
---|
529 | } |
---|
530 | |
---|
531 | /** |
---|
532 | * Set the search direction |
---|
533 | * |
---|
534 | * @param d |
---|
535 | * the direction of the search |
---|
536 | */ |
---|
537 | public void setForwardSelectionMethod(SelectedTag d) { |
---|
538 | if (d.getTags() == TAGS_SEARCH_METHOD) { |
---|
539 | m_forwardSearchMethod = d.getSelectedTag().getID(); |
---|
540 | } |
---|
541 | } |
---|
542 | |
---|
543 | /** |
---|
544 | * Get the search direction |
---|
545 | * |
---|
546 | * @return the direction of the search |
---|
547 | */ |
---|
548 | public SelectedTag getForwardSelectionMethod() { |
---|
549 | return new SelectedTag(m_forwardSearchMethod, TAGS_SEARCH_METHOD); |
---|
550 | } |
---|
551 | |
---|
552 | /** |
---|
553 | * Returns the tip text for this property |
---|
554 | * |
---|
555 | * @return tip text for this property suitable for displaying in the |
---|
556 | * explorer/experimenter gui |
---|
557 | */ |
---|
558 | public String typeTipText() { |
---|
559 | return "Set the type of the search."; |
---|
560 | } |
---|
561 | |
---|
562 | /** |
---|
563 | * Set the type |
---|
564 | * |
---|
565 | * @param t |
---|
566 | * the Linear Forward Selection type |
---|
567 | */ |
---|
568 | public void setType(SelectedTag t) { |
---|
569 | if (t.getTags() == TAGS_TYPE) { |
---|
570 | m_linearSelectionType = t.getSelectedTag().getID(); |
---|
571 | } |
---|
572 | } |
---|
573 | |
---|
574 | /** |
---|
575 | * Get the type |
---|
576 | * |
---|
577 | * @return the Linear Forward Selection type |
---|
578 | */ |
---|
579 | public SelectedTag getType() { |
---|
580 | return new SelectedTag(m_linearSelectionType, TAGS_TYPE); |
---|
581 | } |
---|
582 | |
---|
583 | /** |
---|
584 | * Returns the tip text for this property |
---|
585 | * |
---|
586 | * @return tip text for this property suitable for displaying in the |
---|
587 | * explorer/experimenter gui |
---|
588 | */ |
---|
589 | public String verboseTipText() { |
---|
590 | return "Turn on verbose output for monitoring the search's progress."; |
---|
591 | } |
---|
592 | |
---|
593 | /** |
---|
594 | * Set whether verbose output should be generated. |
---|
595 | * |
---|
596 | * @param b |
---|
597 | * true if output is to be verbose. |
---|
598 | */ |
---|
599 | public void setVerbose(boolean b) { |
---|
600 | m_verbose = b; |
---|
601 | } |
---|
602 | |
---|
603 | /** |
---|
604 | * Get whether output is to be verbose |
---|
605 | * |
---|
606 | * @return true if output will be verbose |
---|
607 | */ |
---|
608 | public boolean getVerbose() { |
---|
609 | return m_verbose; |
---|
610 | } |
---|
611 | |
---|
612 | /** |
---|
613 | * Gets the current settings of LinearForwardSelection. |
---|
614 | * |
---|
615 | * @return an array of strings suitable for passing to setOptions() |
---|
616 | */ |
---|
617 | public String[] getOptions() { |
---|
618 | String[] options = new String[13]; |
---|
619 | int current = 0; |
---|
620 | |
---|
621 | if (!(getStartSet().equals(""))) { |
---|
622 | options[current++] = "-P"; |
---|
623 | options[current++] = "" + startSetToString(); |
---|
624 | } |
---|
625 | |
---|
626 | options[current++] = "-D"; |
---|
627 | options[current++] = "" + m_forwardSearchMethod; |
---|
628 | options[current++] = "-N"; |
---|
629 | options[current++] = "" + m_maxStale; |
---|
630 | |
---|
631 | if (m_performRanking) { |
---|
632 | options[current++] = "-I"; |
---|
633 | } |
---|
634 | |
---|
635 | options[current++] = "-K"; |
---|
636 | options[current++] = "" + m_numUsedAttributes; |
---|
637 | options[current++] = "-T"; |
---|
638 | options[current++] = "" + m_linearSelectionType; |
---|
639 | |
---|
640 | if (m_verbose) |
---|
641 | options[current++] = "-Z"; |
---|
642 | |
---|
643 | while (current < options.length) { |
---|
644 | options[current++] = ""; |
---|
645 | } |
---|
646 | |
---|
647 | return options; |
---|
648 | } |
---|
649 | |
---|
650 | /** |
---|
651 | * converts the array of starting attributes to a string. This is used by |
---|
652 | * getOptions to return the actual attributes specified as the starting set. |
---|
653 | * This is better than using m_startRanges.getRanges() as the same start set |
---|
654 | * can be specified in different ways from the command line---eg 1,2,3 == |
---|
655 | * 1-3. This is to ensure that stuff that is stored in a database is |
---|
656 | * comparable. |
---|
657 | * |
---|
658 | * @return a comma seperated list of individual attribute numbers as a |
---|
659 | * String |
---|
660 | */ |
---|
661 | private String startSetToString() { |
---|
662 | StringBuffer FString = new StringBuffer(); |
---|
663 | boolean didPrint; |
---|
664 | |
---|
665 | if (m_starting == null) { |
---|
666 | return getStartSet(); |
---|
667 | } |
---|
668 | |
---|
669 | for (int i = 0; i < m_starting.length; i++) { |
---|
670 | didPrint = false; |
---|
671 | |
---|
672 | if ((m_hasClass == false) || |
---|
673 | ((m_hasClass == true) && (i != m_classIndex))) { |
---|
674 | FString.append((m_starting[i] + 1)); |
---|
675 | didPrint = true; |
---|
676 | } |
---|
677 | |
---|
678 | if (i == (m_starting.length - 1)) { |
---|
679 | FString.append(""); |
---|
680 | } else { |
---|
681 | if (didPrint) { |
---|
682 | FString.append(","); |
---|
683 | } |
---|
684 | } |
---|
685 | } |
---|
686 | |
---|
687 | return FString.toString(); |
---|
688 | } |
---|
689 | |
---|
690 | /** |
---|
691 | * returns a description of the search as a String |
---|
692 | * |
---|
693 | * @return a description of the search |
---|
694 | */ |
---|
695 | public String toString() { |
---|
696 | StringBuffer LFSString = new StringBuffer(); |
---|
697 | |
---|
698 | LFSString.append("\tLinear Forward Selection.\n\tStart set: "); |
---|
699 | |
---|
700 | if (m_starting == null) { |
---|
701 | LFSString.append("no attributes\n"); |
---|
702 | } else { |
---|
703 | LFSString.append(startSetToString() + "\n"); |
---|
704 | } |
---|
705 | |
---|
706 | LFSString.append("\tForward selection method: "); |
---|
707 | |
---|
708 | if (m_forwardSearchMethod == SEARCH_METHOD_FORWARD) { |
---|
709 | LFSString.append("forward selection\n"); |
---|
710 | } else { |
---|
711 | LFSString.append("floating forward selection\n"); |
---|
712 | } |
---|
713 | |
---|
714 | LFSString.append("\tStale search after " + m_maxStale + |
---|
715 | " node expansions\n"); |
---|
716 | |
---|
717 | LFSString.append("\tLinear Forward Selection Type: "); |
---|
718 | |
---|
719 | if (m_linearSelectionType == TYPE_FIXED_SET) { |
---|
720 | LFSString.append("fixed-set\n"); |
---|
721 | } else { |
---|
722 | LFSString.append("fixed-width\n"); |
---|
723 | } |
---|
724 | |
---|
725 | LFSString.append("\tNumber of top-ranked attributes that are used: " + |
---|
726 | m_numUsedAttributes + "\n"); |
---|
727 | |
---|
728 | LFSString.append("\tTotal number of subsets evaluated: " + m_totalEvals + |
---|
729 | "\n"); |
---|
730 | LFSString.append("\tMerit of best subset found: " + |
---|
731 | Utils.doubleToString(Math.abs(m_bestMerit), 8, 3) + "\n"); |
---|
732 | |
---|
733 | return LFSString.toString(); |
---|
734 | } |
---|
735 | |
---|
736 | /** |
---|
737 | * Searches the attribute subset space by linear forward selection |
---|
738 | * |
---|
739 | * @param ASEval |
---|
740 | * the attribute evaluator to guide the search |
---|
741 | * @param data |
---|
742 | * the training instances. |
---|
743 | * @return an array (not necessarily ordered) of selected attribute indexes |
---|
744 | * @exception Exception |
---|
745 | * if the search can't be completed |
---|
746 | */ |
---|
747 | public int[] search(ASEvaluation ASEval, Instances data) |
---|
748 | throws Exception { |
---|
749 | m_totalEvals = 0; |
---|
750 | |
---|
751 | if (!(ASEval instanceof SubsetEvaluator)) { |
---|
752 | throw new Exception(ASEval.getClass().getName() + " is not a " + |
---|
753 | "Subset evaluator!"); |
---|
754 | } |
---|
755 | |
---|
756 | if (ASEval instanceof UnsupervisedSubsetEvaluator) { |
---|
757 | m_hasClass = false; |
---|
758 | } else { |
---|
759 | m_hasClass = true; |
---|
760 | m_classIndex = data.classIndex(); |
---|
761 | } |
---|
762 | |
---|
763 | ((ASEvaluation) ASEval).buildEvaluator(data); |
---|
764 | |
---|
765 | m_numAttribs = data.numAttributes(); |
---|
766 | |
---|
767 | if (m_numUsedAttributes > m_numAttribs) { |
---|
768 | System.out.println( |
---|
769 | "Decreasing number of top-ranked attributes to total number of attributes: " + |
---|
770 | data.numAttributes()); |
---|
771 | m_numUsedAttributes = m_numAttribs; |
---|
772 | } |
---|
773 | |
---|
774 | BitSet start_group = new BitSet(m_numAttribs); |
---|
775 | m_startRange.setUpper(m_numAttribs - 1); |
---|
776 | |
---|
777 | if (!(getStartSet().equals(""))) { |
---|
778 | m_starting = m_startRange.getSelection(); |
---|
779 | } |
---|
780 | |
---|
781 | // If a starting subset has been supplied, then initialise the bitset |
---|
782 | if (m_starting != null) { |
---|
783 | for (int i = 0; i < m_starting.length; i++) { |
---|
784 | if ((m_starting[i]) != m_classIndex) { |
---|
785 | start_group.set(m_starting[i]); |
---|
786 | } |
---|
787 | } |
---|
788 | } |
---|
789 | |
---|
790 | LFSMethods LFS = new LFSMethods(); |
---|
791 | |
---|
792 | int[] ranking; |
---|
793 | |
---|
794 | if (m_performRanking) { |
---|
795 | ranking = LFS.rankAttributes(data, (SubsetEvaluator) ASEval, m_verbose); |
---|
796 | } else { |
---|
797 | ranking = new int[m_numAttribs]; |
---|
798 | |
---|
799 | for (int i = 0; i < ranking.length; i++) { |
---|
800 | ranking[i] = i; |
---|
801 | } |
---|
802 | } |
---|
803 | |
---|
804 | if (m_forwardSearchMethod == SEARCH_METHOD_FORWARD) { |
---|
805 | LFS.forwardSearch(m_cacheSize, start_group, ranking, m_numUsedAttributes, |
---|
806 | m_linearSelectionType == TYPE_FIXED_WIDTH, m_maxStale, -1, data, |
---|
807 | (SubsetEvaluator) ASEval, m_verbose); |
---|
808 | } else if (m_forwardSearchMethod == SEARCH_METHOD_FLOATING) { |
---|
809 | LFS.floatingForwardSearch(m_cacheSize, start_group, ranking, |
---|
810 | m_numUsedAttributes, m_linearSelectionType == TYPE_FIXED_WIDTH, |
---|
811 | m_maxStale, data, (SubsetEvaluator) ASEval, m_verbose); |
---|
812 | } |
---|
813 | |
---|
814 | m_totalEvals = LFS.getNumEvalsTotal(); |
---|
815 | m_bestMerit = LFS.getBestMerit(); |
---|
816 | |
---|
817 | return attributeList(LFS.getBestGroup()); |
---|
818 | } |
---|
819 | |
---|
820 | /** |
---|
821 | * Reset options to default values |
---|
822 | */ |
---|
823 | protected void resetOptions() { |
---|
824 | m_maxStale = 5; |
---|
825 | m_forwardSearchMethod = SEARCH_METHOD_FORWARD; |
---|
826 | m_performRanking = true; |
---|
827 | m_numUsedAttributes = 50; |
---|
828 | m_linearSelectionType = TYPE_FIXED_SET; |
---|
829 | m_starting = null; |
---|
830 | m_startRange = new Range(); |
---|
831 | m_classIndex = -1; |
---|
832 | m_totalEvals = 0; |
---|
833 | m_cacheSize = 1; |
---|
834 | m_verbose = false; |
---|
835 | } |
---|
836 | |
---|
837 | /** |
---|
838 | * converts a BitSet into a list of attribute indexes |
---|
839 | * |
---|
840 | * @param group |
---|
841 | * the BitSet to convert |
---|
842 | * @return an array of attribute indexes |
---|
843 | */ |
---|
844 | protected int[] attributeList(BitSet group) { |
---|
845 | int count = 0; |
---|
846 | |
---|
847 | // count how many were selected |
---|
848 | for (int i = 0; i < m_numAttribs; i++) { |
---|
849 | if (group.get(i)) { |
---|
850 | count++; |
---|
851 | } |
---|
852 | } |
---|
853 | |
---|
854 | int[] list = new int[count]; |
---|
855 | count = 0; |
---|
856 | |
---|
857 | for (int i = 0; i < m_numAttribs; i++) { |
---|
858 | if (group.get(i)) { |
---|
859 | list[count++] = i; |
---|
860 | } |
---|
861 | } |
---|
862 | |
---|
863 | return list; |
---|
864 | } |
---|
865 | |
---|
866 | /** |
---|
867 | * Returns the revision string. |
---|
868 | * |
---|
869 | * @return the revision |
---|
870 | */ |
---|
871 | public String getRevision() { |
---|
872 | return RevisionUtils.extract("$Revision: 6160 $"); |
---|
873 | } |
---|
874 | } |
---|
875 | |
---|