1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * GreedyStepwise.java |
---|
19 | * Copyright (C) 2004 University of Waikato, Hamilton, New Zealand |
---|
20 | * |
---|
21 | */ |
---|
22 | |
---|
23 | package weka.attributeSelection; |
---|
24 | |
---|
25 | import weka.core.Instances; |
---|
26 | import weka.core.Option; |
---|
27 | import weka.core.OptionHandler; |
---|
28 | import weka.core.Range; |
---|
29 | import weka.core.RevisionUtils; |
---|
30 | import weka.core.Utils; |
---|
31 | |
---|
32 | import java.util.BitSet; |
---|
33 | import java.util.Enumeration; |
---|
34 | import java.util.Vector; |
---|
35 | |
---|
36 | /** |
---|
37 | <!-- globalinfo-start --> |
---|
38 | * GreedyStepwise :<br/> |
---|
39 | * <br/> |
---|
40 | * Performs a greedy forward or backward search through the space of attribute subsets. May start with no/all attributes or from an arbitrary point in the space. Stops when the addition/deletion of any remaining attributes results in a decrease in evaluation. Can also produce a ranked list of attributes by traversing the space from one side to the other and recording the order that attributes are selected.<br/> |
---|
41 | * <p/> |
---|
42 | <!-- globalinfo-end --> |
---|
43 | * |
---|
44 | <!-- options-start --> |
---|
45 | * Valid options are: <p/> |
---|
46 | * |
---|
47 | * <pre> -C |
---|
48 | * Use conservative forward search</pre> |
---|
49 | * |
---|
50 | * <pre> -B |
---|
51 | * Use a backward search instead of a |
---|
52 | * forward one.</pre> |
---|
53 | * |
---|
54 | * <pre> -P <start set> |
---|
55 | * Specify a starting set of attributes. |
---|
56 | * Eg. 1,3,5-7.</pre> |
---|
57 | * |
---|
58 | * <pre> -R |
---|
59 | * Produce a ranked list of attributes.</pre> |
---|
60 | * |
---|
61 | * <pre> -T <threshold> |
---|
62 | * Specify a theshold by which attributes |
---|
63 | * may be discarded from the ranking. |
---|
64 | * Use in conjuction with -R</pre> |
---|
65 | * |
---|
66 | * <pre> -N <num to select> |
---|
67 | * Specify number of attributes to select</pre> |
---|
68 | * |
---|
69 | <!-- options-end --> |
---|
70 | * |
---|
71 | * @author Mark Hall |
---|
72 | * @version $Revision: 1.10 $ |
---|
73 | */ |
---|
74 | public class GreedyStepwise |
---|
75 | extends ASSearch |
---|
76 | implements RankedOutputSearch, StartSetHandler, OptionHandler { |
---|
77 | |
---|
78 | /** for serialization */ |
---|
79 | static final long serialVersionUID = -6312951970168325471L; |
---|
80 | |
---|
81 | /** does the data have a class */ |
---|
82 | protected boolean m_hasClass; |
---|
83 | |
---|
84 | /** holds the class index */ |
---|
85 | protected int m_classIndex; |
---|
86 | |
---|
87 | /** number of attributes in the data */ |
---|
88 | protected int m_numAttribs; |
---|
89 | |
---|
90 | /** true if the user has requested a ranked list of attributes */ |
---|
91 | protected boolean m_rankingRequested; |
---|
92 | |
---|
93 | /** |
---|
94 | * go from one side of the search space to the other in order to generate |
---|
95 | * a ranking |
---|
96 | */ |
---|
97 | protected boolean m_doRank; |
---|
98 | |
---|
99 | /** used to indicate whether or not ranking has been performed */ |
---|
100 | protected boolean m_doneRanking; |
---|
101 | |
---|
102 | /** |
---|
103 | * A threshold by which to discard attributes---used by the |
---|
104 | * AttributeSelection module |
---|
105 | */ |
---|
106 | protected double m_threshold; |
---|
107 | |
---|
108 | /** The number of attributes to select. -1 indicates that all attributes |
---|
109 | are to be retained. Has precedence over m_threshold */ |
---|
110 | protected int m_numToSelect = -1; |
---|
111 | |
---|
112 | protected int m_calculatedNumToSelect; |
---|
113 | |
---|
114 | /** the merit of the best subset found */ |
---|
115 | protected double m_bestMerit; |
---|
116 | |
---|
117 | /** a ranked list of attribute indexes */ |
---|
118 | protected double [][] m_rankedAtts; |
---|
119 | protected int m_rankedSoFar; |
---|
120 | |
---|
121 | /** the best subset found */ |
---|
122 | protected BitSet m_best_group; |
---|
123 | protected ASEvaluation m_ASEval; |
---|
124 | |
---|
125 | protected Instances m_Instances; |
---|
126 | |
---|
127 | /** holds the start set for the search as a Range */ |
---|
128 | protected Range m_startRange; |
---|
129 | |
---|
130 | /** holds an array of starting attributes */ |
---|
131 | protected int [] m_starting; |
---|
132 | |
---|
133 | /** Use a backwards search instead of a forwards one */ |
---|
134 | protected boolean m_backward = false; |
---|
135 | |
---|
136 | /** If set then attributes will continue to be added during a forward |
---|
137 | search as long as the merit does not degrade */ |
---|
138 | protected boolean m_conservativeSelection = false; |
---|
139 | |
---|
140 | /** |
---|
141 | * Constructor |
---|
142 | */ |
---|
143 | public GreedyStepwise () { |
---|
144 | m_threshold = -Double.MAX_VALUE; |
---|
145 | m_doneRanking = false; |
---|
146 | m_startRange = new Range(); |
---|
147 | m_starting = null; |
---|
148 | resetOptions(); |
---|
149 | } |
---|
150 | |
---|
151 | /** |
---|
152 | * Returns a string describing this search method |
---|
153 | * @return a description of the search suitable for |
---|
154 | * displaying in the explorer/experimenter gui |
---|
155 | */ |
---|
156 | public String globalInfo() { |
---|
157 | return "GreedyStepwise :\n\nPerforms a greedy forward or backward search " |
---|
158 | +"through " |
---|
159 | +"the space of attribute subsets. May start with no/all attributes or from " |
---|
160 | +"an arbitrary point in the space. Stops when the addition/deletion of any " |
---|
161 | +"remaining attributes results in a decrease in evaluation. " |
---|
162 | +"Can also produce a ranked list of " |
---|
163 | +"attributes by traversing the space from one side to the other and " |
---|
164 | +"recording the order that attributes are selected.\n"; |
---|
165 | } |
---|
166 | |
---|
167 | /** |
---|
168 | * Returns the tip text for this property |
---|
169 | * @return tip text for this property suitable for |
---|
170 | * displaying in the explorer/experimenter gui |
---|
171 | */ |
---|
172 | public String searchBackwardsTipText() { |
---|
173 | return "Search backwards rather than forwards."; |
---|
174 | } |
---|
175 | |
---|
176 | /** |
---|
177 | * Set whether to search backwards instead of forwards |
---|
178 | * |
---|
179 | * @param back true to search backwards |
---|
180 | */ |
---|
181 | public void setSearchBackwards(boolean back) { |
---|
182 | m_backward = back; |
---|
183 | if (m_backward) { |
---|
184 | setGenerateRanking(false); |
---|
185 | } |
---|
186 | } |
---|
187 | |
---|
188 | /** |
---|
189 | * Get whether to search backwards |
---|
190 | * |
---|
191 | * @return true if the search will proceed backwards |
---|
192 | */ |
---|
193 | public boolean getSearchBackwards() { |
---|
194 | return m_backward; |
---|
195 | } |
---|
196 | |
---|
197 | /** |
---|
198 | * Returns the tip text for this property |
---|
199 | * @return tip text for this property suitable for |
---|
200 | * displaying in the explorer/experimenter gui |
---|
201 | */ |
---|
202 | public String thresholdTipText() { |
---|
203 | return "Set threshold by which attributes can be discarded. Default value " |
---|
204 | + "results in no attributes being discarded. Use in conjunction with " |
---|
205 | + "generateRanking"; |
---|
206 | } |
---|
207 | |
---|
208 | /** |
---|
209 | * Set the threshold by which the AttributeSelection module can discard |
---|
210 | * attributes. |
---|
211 | * @param threshold the threshold. |
---|
212 | */ |
---|
213 | public void setThreshold(double threshold) { |
---|
214 | m_threshold = threshold; |
---|
215 | } |
---|
216 | |
---|
217 | /** |
---|
218 | * Returns the threshold so that the AttributeSelection module can |
---|
219 | * discard attributes from the ranking. |
---|
220 | */ |
---|
221 | public double getThreshold() { |
---|
222 | return m_threshold; |
---|
223 | } |
---|
224 | |
---|
225 | /** |
---|
226 | * Returns the tip text for this property |
---|
227 | * @return tip text for this property suitable for |
---|
228 | * displaying in the explorer/experimenter gui |
---|
229 | */ |
---|
230 | public String numToSelectTipText() { |
---|
231 | return "Specify the number of attributes to retain. The default value " |
---|
232 | +"(-1) indicates that all attributes are to be retained. Use either " |
---|
233 | +"this option or a threshold to reduce the attribute set."; |
---|
234 | } |
---|
235 | |
---|
236 | /** |
---|
237 | * Specify the number of attributes to select from the ranked list |
---|
238 | * (if generating a ranking). -1 |
---|
239 | * indicates that all attributes are to be retained. |
---|
240 | * @param n the number of attributes to retain |
---|
241 | */ |
---|
242 | public void setNumToSelect(int n) { |
---|
243 | m_numToSelect = n; |
---|
244 | } |
---|
245 | |
---|
246 | /** |
---|
247 | * Gets the number of attributes to be retained. |
---|
248 | * @return the number of attributes to retain |
---|
249 | */ |
---|
250 | public int getNumToSelect() { |
---|
251 | return m_numToSelect; |
---|
252 | } |
---|
253 | |
---|
254 | /** |
---|
255 | * Gets the calculated number of attributes to retain. This is the |
---|
256 | * actual number of attributes to retain. This is the same as |
---|
257 | * getNumToSelect if the user specifies a number which is not less |
---|
258 | * than zero. Otherwise it should be the number of attributes in the |
---|
259 | * (potentially transformed) data. |
---|
260 | */ |
---|
261 | public int getCalculatedNumToSelect() { |
---|
262 | if (m_numToSelect >= 0) { |
---|
263 | m_calculatedNumToSelect = m_numToSelect; |
---|
264 | } |
---|
265 | return m_calculatedNumToSelect; |
---|
266 | } |
---|
267 | |
---|
268 | /** |
---|
269 | * Returns the tip text for this property |
---|
270 | * @return tip text for this property suitable for |
---|
271 | * displaying in the explorer/experimenter gui |
---|
272 | */ |
---|
273 | public String generateRankingTipText() { |
---|
274 | return "Set to true if a ranked list is required."; |
---|
275 | } |
---|
276 | |
---|
277 | /** |
---|
278 | * Records whether the user has requested a ranked list of attributes. |
---|
279 | * @param doRank true if ranking is requested |
---|
280 | */ |
---|
281 | public void setGenerateRanking(boolean doRank) { |
---|
282 | m_rankingRequested = doRank; |
---|
283 | } |
---|
284 | |
---|
285 | /** |
---|
286 | * Gets whether ranking has been requested. This is used by the |
---|
287 | * AttributeSelection module to determine if rankedAttributes() |
---|
288 | * should be called. |
---|
289 | * @return true if ranking has been requested. |
---|
290 | */ |
---|
291 | public boolean getGenerateRanking() { |
---|
292 | return m_rankingRequested; |
---|
293 | } |
---|
294 | |
---|
295 | /** |
---|
296 | * Returns the tip text for this property |
---|
297 | * @return tip text for this property suitable for |
---|
298 | * displaying in the explorer/experimenter gui |
---|
299 | */ |
---|
300 | public String startSetTipText() { |
---|
301 | return "Set the start point for the search. This is specified as a comma " |
---|
302 | +"seperated list off attribute indexes starting at 1. It can include " |
---|
303 | +"ranges. Eg. 1,2,5-9,17."; |
---|
304 | } |
---|
305 | |
---|
306 | /** |
---|
307 | * Sets a starting set of attributes for the search. It is the |
---|
308 | * search method's responsibility to report this start set (if any) |
---|
309 | * in its toString() method. |
---|
310 | * @param startSet a string containing a list of attributes (and or ranges), |
---|
311 | * eg. 1,2,6,10-15. |
---|
312 | * @throws Exception if start set can't be set. |
---|
313 | */ |
---|
314 | public void setStartSet (String startSet) throws Exception { |
---|
315 | m_startRange.setRanges(startSet); |
---|
316 | } |
---|
317 | |
---|
318 | /** |
---|
319 | * Returns a list of attributes (and or attribute ranges) as a String |
---|
320 | * @return a list of attributes (and or attribute ranges) |
---|
321 | */ |
---|
322 | public String getStartSet () { |
---|
323 | return m_startRange.getRanges(); |
---|
324 | } |
---|
325 | |
---|
326 | /** |
---|
327 | * Returns the tip text for this property |
---|
328 | * @return tip text for this property suitable for |
---|
329 | * displaying in the explorer/experimenter gui |
---|
330 | */ |
---|
331 | public String conservativeForwardSelectionTipText() { |
---|
332 | return "If true (and forward search is selected) then attributes " |
---|
333 | +"will continue to be added to the best subset as long as merit does " |
---|
334 | +"not degrade."; |
---|
335 | } |
---|
336 | |
---|
337 | /** |
---|
338 | * Set whether attributes should continue to be added during |
---|
339 | * a forward search as long as merit does not decrease |
---|
340 | * @param c true if atts should continue to be atted |
---|
341 | */ |
---|
342 | public void setConservativeForwardSelection(boolean c) { |
---|
343 | m_conservativeSelection = c; |
---|
344 | } |
---|
345 | |
---|
346 | /** |
---|
347 | * Gets whether conservative selection has been enabled |
---|
348 | * @return true if conservative forward selection is enabled |
---|
349 | */ |
---|
350 | public boolean getConservativeForwardSelection() { |
---|
351 | return m_conservativeSelection; |
---|
352 | } |
---|
353 | |
---|
354 | /** |
---|
355 | * Returns an enumeration describing the available options. |
---|
356 | * @return an enumeration of all the available options. |
---|
357 | **/ |
---|
358 | public Enumeration listOptions () { |
---|
359 | Vector newVector = new Vector(5); |
---|
360 | |
---|
361 | newVector.addElement(new Option("\tUse conservative forward search" |
---|
362 | ,"-C", 0, "-C")); |
---|
363 | |
---|
364 | newVector.addElement(new Option("\tUse a backward search instead of a" |
---|
365 | +"\n\tforward one." |
---|
366 | ,"-B", 0, "-B")); |
---|
367 | newVector |
---|
368 | .addElement(new Option("\tSpecify a starting set of attributes." |
---|
369 | + "\n\tEg. 1,3,5-7." |
---|
370 | ,"P",1 |
---|
371 | , "-P <start set>")); |
---|
372 | |
---|
373 | newVector.addElement(new Option("\tProduce a ranked list of attributes." |
---|
374 | ,"R",0,"-R")); |
---|
375 | newVector |
---|
376 | .addElement(new Option("\tSpecify a theshold by which attributes" |
---|
377 | + "\n\tmay be discarded from the ranking." |
---|
378 | +"\n\tUse in conjuction with -R","T",1 |
---|
379 | , "-T <threshold>")); |
---|
380 | |
---|
381 | newVector |
---|
382 | .addElement(new Option("\tSpecify number of attributes to select" |
---|
383 | ,"N",1 |
---|
384 | , "-N <num to select>")); |
---|
385 | |
---|
386 | return newVector.elements(); |
---|
387 | |
---|
388 | } |
---|
389 | |
---|
390 | /** |
---|
391 | * Parses a given list of options. <p/> |
---|
392 | * |
---|
393 | <!-- options-start --> |
---|
394 | * Valid options are: <p/> |
---|
395 | * |
---|
396 | * <pre> -C |
---|
397 | * Use conservative forward search</pre> |
---|
398 | * |
---|
399 | * <pre> -B |
---|
400 | * Use a backward search instead of a |
---|
401 | * forward one.</pre> |
---|
402 | * |
---|
403 | * <pre> -P <start set> |
---|
404 | * Specify a starting set of attributes. |
---|
405 | * Eg. 1,3,5-7.</pre> |
---|
406 | * |
---|
407 | * <pre> -R |
---|
408 | * Produce a ranked list of attributes.</pre> |
---|
409 | * |
---|
410 | * <pre> -T <threshold> |
---|
411 | * Specify a theshold by which attributes |
---|
412 | * may be discarded from the ranking. |
---|
413 | * Use in conjuction with -R</pre> |
---|
414 | * |
---|
415 | * <pre> -N <num to select> |
---|
416 | * Specify number of attributes to select</pre> |
---|
417 | * |
---|
418 | <!-- options-end --> |
---|
419 | * |
---|
420 | * @param options the list of options as an array of strings |
---|
421 | * @throws Exception if an option is not supported |
---|
422 | */ |
---|
423 | public void setOptions (String[] options) |
---|
424 | throws Exception { |
---|
425 | String optionString; |
---|
426 | resetOptions(); |
---|
427 | |
---|
428 | setSearchBackwards(Utils.getFlag('B', options)); |
---|
429 | |
---|
430 | setConservativeForwardSelection(Utils.getFlag('C', options)); |
---|
431 | |
---|
432 | optionString = Utils.getOption('P', options); |
---|
433 | if (optionString.length() != 0) { |
---|
434 | setStartSet(optionString); |
---|
435 | } |
---|
436 | |
---|
437 | setGenerateRanking(Utils.getFlag('R', options)); |
---|
438 | |
---|
439 | optionString = Utils.getOption('T', options); |
---|
440 | if (optionString.length() != 0) { |
---|
441 | Double temp; |
---|
442 | temp = Double.valueOf(optionString); |
---|
443 | setThreshold(temp.doubleValue()); |
---|
444 | } |
---|
445 | |
---|
446 | optionString = Utils.getOption('N', options); |
---|
447 | if (optionString.length() != 0) { |
---|
448 | setNumToSelect(Integer.parseInt(optionString)); |
---|
449 | } |
---|
450 | } |
---|
451 | |
---|
452 | /** |
---|
453 | * Gets the current settings of ReliefFAttributeEval. |
---|
454 | * |
---|
455 | * @return an array of strings suitable for passing to setOptions() |
---|
456 | */ |
---|
457 | public String[] getOptions () { |
---|
458 | String[] options = new String[9]; |
---|
459 | int current = 0; |
---|
460 | |
---|
461 | if (getSearchBackwards()) { |
---|
462 | options[current++] = "-B"; |
---|
463 | } |
---|
464 | |
---|
465 | if (getConservativeForwardSelection()) { |
---|
466 | options[current++] = "-C"; |
---|
467 | } |
---|
468 | |
---|
469 | if (!(getStartSet().equals(""))) { |
---|
470 | options[current++] = "-P"; |
---|
471 | options[current++] = ""+startSetToString(); |
---|
472 | } |
---|
473 | |
---|
474 | if (getGenerateRanking()) { |
---|
475 | options[current++] = "-R"; |
---|
476 | } |
---|
477 | options[current++] = "-T"; |
---|
478 | options[current++] = "" + getThreshold(); |
---|
479 | |
---|
480 | options[current++] = "-N"; |
---|
481 | options[current++] = ""+getNumToSelect(); |
---|
482 | |
---|
483 | while (current < options.length) { |
---|
484 | options[current++] = ""; |
---|
485 | } |
---|
486 | return options; |
---|
487 | } |
---|
488 | |
---|
489 | /** |
---|
490 | * converts the array of starting attributes to a string. This is |
---|
491 | * used by getOptions to return the actual attributes specified |
---|
492 | * as the starting set. This is better than using m_startRanges.getRanges() |
---|
493 | * as the same start set can be specified in different ways from the |
---|
494 | * command line---eg 1,2,3 == 1-3. This is to ensure that stuff that |
---|
495 | * is stored in a database is comparable. |
---|
496 | * @return a comma seperated list of individual attribute numbers as a String |
---|
497 | */ |
---|
498 | protected String startSetToString() { |
---|
499 | StringBuffer FString = new StringBuffer(); |
---|
500 | boolean didPrint; |
---|
501 | |
---|
502 | if (m_starting == null) { |
---|
503 | return getStartSet(); |
---|
504 | } |
---|
505 | for (int i = 0; i < m_starting.length; i++) { |
---|
506 | didPrint = false; |
---|
507 | |
---|
508 | if ((m_hasClass == false) || |
---|
509 | (m_hasClass == true && i != m_classIndex)) { |
---|
510 | FString.append((m_starting[i] + 1)); |
---|
511 | didPrint = true; |
---|
512 | } |
---|
513 | |
---|
514 | if (i == (m_starting.length - 1)) { |
---|
515 | FString.append(""); |
---|
516 | } |
---|
517 | else { |
---|
518 | if (didPrint) { |
---|
519 | FString.append(","); |
---|
520 | } |
---|
521 | } |
---|
522 | } |
---|
523 | |
---|
524 | return FString.toString(); |
---|
525 | } |
---|
526 | |
---|
527 | /** |
---|
528 | * returns a description of the search. |
---|
529 | * @return a description of the search as a String. |
---|
530 | */ |
---|
531 | public String toString() { |
---|
532 | StringBuffer FString = new StringBuffer(); |
---|
533 | FString.append("\tGreedy Stepwise (" |
---|
534 | + ((m_backward) |
---|
535 | ? "backwards)" |
---|
536 | : "forwards)")+".\n\tStart set: "); |
---|
537 | |
---|
538 | if (m_starting == null) { |
---|
539 | if (m_backward) { |
---|
540 | FString.append("all attributes\n"); |
---|
541 | } else { |
---|
542 | FString.append("no attributes\n"); |
---|
543 | } |
---|
544 | } |
---|
545 | else { |
---|
546 | FString.append(startSetToString()+"\n"); |
---|
547 | } |
---|
548 | if (!m_doneRanking) { |
---|
549 | FString.append("\tMerit of best subset found: " |
---|
550 | +Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n"); |
---|
551 | } |
---|
552 | |
---|
553 | if ((m_threshold != -Double.MAX_VALUE) && (m_doneRanking)) { |
---|
554 | FString.append("\tThreshold for discarding attributes: " |
---|
555 | + Utils.doubleToString(m_threshold,8,4)+"\n"); |
---|
556 | } |
---|
557 | |
---|
558 | return FString.toString(); |
---|
559 | } |
---|
560 | |
---|
561 | |
---|
562 | /** |
---|
563 | * Searches the attribute subset space by forward selection. |
---|
564 | * |
---|
565 | * @param ASEval the attribute evaluator to guide the search |
---|
566 | * @param data the training instances. |
---|
567 | * @return an array (not necessarily ordered) of selected attribute indexes |
---|
568 | * @throws Exception if the search can't be completed |
---|
569 | */ |
---|
570 | public int[] search (ASEvaluation ASEval, Instances data) |
---|
571 | throws Exception { |
---|
572 | |
---|
573 | int i; |
---|
574 | double best_merit = -Double.MAX_VALUE; |
---|
575 | double temp_best,temp_merit; |
---|
576 | int temp_index=0; |
---|
577 | BitSet temp_group; |
---|
578 | |
---|
579 | if (data != null) { // this is a fresh run so reset |
---|
580 | resetOptions(); |
---|
581 | m_Instances = data; |
---|
582 | } |
---|
583 | m_ASEval = ASEval; |
---|
584 | |
---|
585 | m_numAttribs = m_Instances.numAttributes(); |
---|
586 | |
---|
587 | if (m_best_group == null) { |
---|
588 | m_best_group = new BitSet(m_numAttribs); |
---|
589 | } |
---|
590 | |
---|
591 | if (!(m_ASEval instanceof SubsetEvaluator)) { |
---|
592 | throw new Exception(m_ASEval.getClass().getName() |
---|
593 | + " is not a " |
---|
594 | + "Subset evaluator!"); |
---|
595 | } |
---|
596 | |
---|
597 | m_startRange.setUpper(m_numAttribs-1); |
---|
598 | if (!(getStartSet().equals(""))) { |
---|
599 | m_starting = m_startRange.getSelection(); |
---|
600 | } |
---|
601 | |
---|
602 | if (m_ASEval instanceof UnsupervisedSubsetEvaluator) { |
---|
603 | m_hasClass = false; |
---|
604 | m_classIndex = -1; |
---|
605 | } |
---|
606 | else { |
---|
607 | m_hasClass = true; |
---|
608 | m_classIndex = m_Instances.classIndex(); |
---|
609 | } |
---|
610 | |
---|
611 | SubsetEvaluator ASEvaluator = (SubsetEvaluator)m_ASEval; |
---|
612 | |
---|
613 | if (m_rankedAtts == null) { |
---|
614 | m_rankedAtts = new double[m_numAttribs][2]; |
---|
615 | m_rankedSoFar = 0; |
---|
616 | } |
---|
617 | |
---|
618 | // If a starting subset has been supplied, then initialise the bitset |
---|
619 | if (m_starting != null && m_rankedSoFar <= 0) { |
---|
620 | for (i = 0; i < m_starting.length; i++) { |
---|
621 | if ((m_starting[i]) != m_classIndex) { |
---|
622 | m_best_group.set(m_starting[i]); |
---|
623 | } |
---|
624 | } |
---|
625 | } else { |
---|
626 | if (m_backward && m_rankedSoFar <= 0) { |
---|
627 | for (i = 0; i < m_numAttribs; i++) { |
---|
628 | if (i != m_classIndex) { |
---|
629 | m_best_group.set(i); |
---|
630 | } |
---|
631 | } |
---|
632 | } |
---|
633 | } |
---|
634 | |
---|
635 | // Evaluate the initial subset |
---|
636 | best_merit = ASEvaluator.evaluateSubset(m_best_group); |
---|
637 | |
---|
638 | // main search loop |
---|
639 | boolean done = false; |
---|
640 | boolean addone = false; |
---|
641 | boolean z; |
---|
642 | while (!done) { |
---|
643 | temp_group = (BitSet)m_best_group.clone(); |
---|
644 | temp_best = best_merit; |
---|
645 | if (m_doRank) { |
---|
646 | temp_best = -Double.MAX_VALUE; |
---|
647 | } |
---|
648 | done = true; |
---|
649 | addone = false; |
---|
650 | for (i=0;i<m_numAttribs;i++) { |
---|
651 | if (m_backward) { |
---|
652 | z = ((i != m_classIndex) && (temp_group.get(i))); |
---|
653 | } else { |
---|
654 | z = ((i != m_classIndex) && (!temp_group.get(i))); |
---|
655 | } |
---|
656 | if (z) { |
---|
657 | // set/unset the bit |
---|
658 | if (m_backward) { |
---|
659 | temp_group.clear(i); |
---|
660 | } else { |
---|
661 | temp_group.set(i); |
---|
662 | } |
---|
663 | temp_merit = ASEvaluator.evaluateSubset(temp_group); |
---|
664 | if (m_backward) { |
---|
665 | z = (temp_merit >= temp_best); |
---|
666 | } else { |
---|
667 | if (m_conservativeSelection) { |
---|
668 | z = (temp_merit >= temp_best); |
---|
669 | } else { |
---|
670 | z = (temp_merit > temp_best); |
---|
671 | } |
---|
672 | } |
---|
673 | |
---|
674 | if (z) { |
---|
675 | temp_best = temp_merit; |
---|
676 | temp_index = i; |
---|
677 | addone = true; |
---|
678 | done = false; |
---|
679 | } |
---|
680 | |
---|
681 | // unset this addition/deletion |
---|
682 | if (m_backward) { |
---|
683 | temp_group.set(i); |
---|
684 | } else { |
---|
685 | temp_group.clear(i); |
---|
686 | } |
---|
687 | if (m_doRank) { |
---|
688 | done = false; |
---|
689 | } |
---|
690 | } |
---|
691 | } |
---|
692 | if (addone) { |
---|
693 | if (m_backward) { |
---|
694 | m_best_group.clear(temp_index); |
---|
695 | } else { |
---|
696 | m_best_group.set(temp_index); |
---|
697 | } |
---|
698 | best_merit = temp_best; |
---|
699 | m_rankedAtts[m_rankedSoFar][0] = temp_index; |
---|
700 | m_rankedAtts[m_rankedSoFar][1] = best_merit; |
---|
701 | m_rankedSoFar++; |
---|
702 | } |
---|
703 | } |
---|
704 | m_bestMerit = best_merit; |
---|
705 | return attributeList(m_best_group); |
---|
706 | } |
---|
707 | |
---|
708 | /** |
---|
709 | * Produces a ranked list of attributes. Search must have been performed |
---|
710 | * prior to calling this function. Search is called by this function to |
---|
711 | * complete the traversal of the the search space. A list of |
---|
712 | * attributes and merits are returned. The attributes a ranked by the |
---|
713 | * order they are added to the subset during a forward selection search. |
---|
714 | * Individual merit values reflect the merit associated with adding the |
---|
715 | * corresponding attribute to the subset; because of this, merit values |
---|
716 | * may initially increase but then decrease as the best subset is |
---|
717 | * "passed by" on the way to the far side of the search space. |
---|
718 | * |
---|
719 | * @return an array of attribute indexes and associated merit values |
---|
720 | * @throws Exception if something goes wrong. |
---|
721 | */ |
---|
722 | public double [][] rankedAttributes() throws Exception { |
---|
723 | |
---|
724 | if (m_rankedAtts == null || m_rankedSoFar == -1) { |
---|
725 | throw new Exception("Search must be performed before attributes " |
---|
726 | +"can be ranked."); |
---|
727 | } |
---|
728 | |
---|
729 | m_doRank = true; |
---|
730 | search (m_ASEval, null); |
---|
731 | |
---|
732 | double [][] final_rank = new double [m_rankedSoFar][2]; |
---|
733 | for (int i=0;i<m_rankedSoFar;i++) { |
---|
734 | final_rank[i][0] = m_rankedAtts[i][0]; |
---|
735 | final_rank[i][1] = m_rankedAtts[i][1]; |
---|
736 | } |
---|
737 | |
---|
738 | resetOptions(); |
---|
739 | m_doneRanking = true; |
---|
740 | |
---|
741 | if (m_numToSelect > final_rank.length) { |
---|
742 | throw new Exception("More attributes requested than exist in the data"); |
---|
743 | } |
---|
744 | |
---|
745 | if (m_numToSelect <= 0) { |
---|
746 | if (m_threshold == -Double.MAX_VALUE) { |
---|
747 | m_calculatedNumToSelect = final_rank.length; |
---|
748 | } else { |
---|
749 | determineNumToSelectFromThreshold(final_rank); |
---|
750 | } |
---|
751 | } |
---|
752 | |
---|
753 | return final_rank; |
---|
754 | } |
---|
755 | |
---|
756 | private void determineNumToSelectFromThreshold(double [][] ranking) { |
---|
757 | int count = 0; |
---|
758 | for (int i = 0; i < ranking.length; i++) { |
---|
759 | if (ranking[i][1] > m_threshold) { |
---|
760 | count++; |
---|
761 | } |
---|
762 | } |
---|
763 | m_calculatedNumToSelect = count; |
---|
764 | } |
---|
765 | |
---|
766 | /** |
---|
767 | * converts a BitSet into a list of attribute indexes |
---|
768 | * @param group the BitSet to convert |
---|
769 | * @return an array of attribute indexes |
---|
770 | **/ |
---|
771 | protected int[] attributeList (BitSet group) { |
---|
772 | int count = 0; |
---|
773 | |
---|
774 | // count how many were selected |
---|
775 | for (int i = 0; i < m_numAttribs; i++) { |
---|
776 | if (group.get(i)) { |
---|
777 | count++; |
---|
778 | } |
---|
779 | } |
---|
780 | |
---|
781 | int[] list = new int[count]; |
---|
782 | count = 0; |
---|
783 | |
---|
784 | for (int i = 0; i < m_numAttribs; i++) { |
---|
785 | if (group.get(i)) { |
---|
786 | list[count++] = i; |
---|
787 | } |
---|
788 | } |
---|
789 | |
---|
790 | return list; |
---|
791 | } |
---|
792 | |
---|
793 | /** |
---|
794 | * Resets options |
---|
795 | */ |
---|
796 | protected void resetOptions() { |
---|
797 | m_doRank = false; |
---|
798 | m_best_group = null; |
---|
799 | m_ASEval = null; |
---|
800 | m_Instances = null; |
---|
801 | m_rankedSoFar = -1; |
---|
802 | m_rankedAtts = null; |
---|
803 | } |
---|
804 | |
---|
805 | /** |
---|
806 | * Returns the revision string. |
---|
807 | * |
---|
808 | * @return the revision |
---|
809 | */ |
---|
810 | public String getRevision() { |
---|
811 | return RevisionUtils.extract("$Revision: 1.10 $"); |
---|
812 | } |
---|
813 | } |
---|