1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * ScatterSearchV1.java |
---|
19 | * Copyright (C) 2008 Adrian Pino |
---|
20 | * Copyright (C) 2008 University of Waikato, Hamilton, NZ |
---|
21 | * |
---|
22 | */ |
---|
23 | |
---|
24 | package weka.attributeSelection; |
---|
25 | |
---|
26 | |
---|
27 | import java.io.Serializable; |
---|
28 | import java.util.ArrayList; |
---|
29 | import java.util.BitSet; |
---|
30 | import java.util.Enumeration; |
---|
31 | import java.util.List; |
---|
32 | import java.util.Random; |
---|
33 | import java.util.Vector; |
---|
34 | |
---|
35 | import weka.core.Instances; |
---|
36 | import weka.core.Option; |
---|
37 | import weka.core.OptionHandler; |
---|
38 | import weka.core.RevisionUtils; |
---|
39 | import weka.core.SelectedTag; |
---|
40 | import weka.core.Tag; |
---|
41 | import weka.core.TechnicalInformation; |
---|
42 | import weka.core.TechnicalInformationHandler; |
---|
43 | import weka.core.Utils; |
---|
44 | import weka.core.TechnicalInformation.Field; |
---|
45 | import weka.core.TechnicalInformation.Type; |
---|
46 | |
---|
47 | |
---|
48 | /** |
---|
49 | * Class for performing the Sequential Scatter Search. <p> |
---|
50 | * |
---|
51 | <!-- globalinfo-start --> |
---|
52 | * Scatter Search :<br/> |
---|
53 | * <br/> |
---|
54 | * Performs an Scatter Search through the space of attribute subsets. Start with a population of many significants and diverses subset stops when the result is higher than a given treshold or there's not more improvement<br/> |
---|
55 | * For more information see:<br/> |
---|
56 | * <br/> |
---|
57 | * Felix Garcia Lopez (2004). Solving feature subset selection problem by a Parallel Scatter Search. Elsevier. |
---|
58 | * <p/> |
---|
59 | <!-- globalinfo-end --> |
---|
60 | * |
---|
61 | <!-- options-start --> |
---|
62 | * Valid options are: <p/> |
---|
63 | * |
---|
64 | * <pre> -Z <num> |
---|
65 | * Specify the number of subsets to generate |
---|
66 | * in the initial population..</pre> |
---|
67 | * |
---|
68 | * <pre> -T <threshold> |
---|
69 | * Specify the treshold used for considering when a subset is significant.</pre> |
---|
70 | * |
---|
71 | * <pre> -R <0 = greedy combination | 1 = reduced greedy combination > |
---|
72 | * Specify the kind of combiantion |
---|
73 | * for using it in the combination method.</pre> |
---|
74 | * |
---|
75 | * <pre> -S <seed> |
---|
76 | * Set the random number seed. |
---|
77 | * (default = 1)</pre> |
---|
78 | * |
---|
79 | * <pre> -D |
---|
80 | * Verbose output for monitoring the search.</pre> |
---|
81 | * |
---|
82 | <!-- options-end --> |
---|
83 | * |
---|
84 | <!-- technical-bibtex-start --> |
---|
85 | * BibTeX: |
---|
86 | * <pre> |
---|
87 | * @book{Lopez2004, |
---|
88 | * author = {Felix Garcia Lopez}, |
---|
89 | * month = {October}, |
---|
90 | * publisher = {Elsevier}, |
---|
91 | * title = {Solving feature subset selection problem by a Parallel Scatter Search}, |
---|
92 | * year = {2004}, |
---|
93 | * language = {English} |
---|
94 | * } |
---|
95 | * </pre> |
---|
96 | * <p/> |
---|
97 | <!-- technical-bibtex-end --> |
---|
98 | * |
---|
99 | * from the Book: Solving feature subset selection problem by a Parallel Scatter Search, Felix Garcia Lopez. |
---|
100 | * @author Adrian Pino (apinoa@facinf.uho.edu.cu) |
---|
101 | * @version $Revision: 4977 $ |
---|
102 | * |
---|
103 | */ |
---|
104 | |
---|
105 | public class ScatterSearchV1 extends ASSearch |
---|
106 | implements OptionHandler, TechnicalInformationHandler { |
---|
107 | |
---|
108 | /** for serialization */ |
---|
109 | static final long serialVersionUID = -8512041420388121326L; |
---|
110 | |
---|
111 | /** number of attributes in the data */ |
---|
112 | private int m_numAttribs; |
---|
113 | |
---|
114 | /** holds the class index */ |
---|
115 | private int m_classIndex; |
---|
116 | |
---|
117 | /** holds the treshhold that delimits the good attributes */ |
---|
118 | private double m_treshold; |
---|
119 | |
---|
120 | /** the initial threshold */ |
---|
121 | private double m_initialThreshold; |
---|
122 | |
---|
123 | /** the kind of comination betwen parents ((0)greedy combination/(1)reduced greedy combination)*/ |
---|
124 | int m_typeOfCombination; |
---|
125 | |
---|
126 | /** random number generation */ |
---|
127 | private Random m_random; |
---|
128 | |
---|
129 | /** seed for random number generation */ |
---|
130 | private int m_seed; |
---|
131 | |
---|
132 | /** verbose output for monitoring the search and debugging */ |
---|
133 | private boolean m_debug = false; |
---|
134 | |
---|
135 | /** holds a report of the search */ |
---|
136 | private StringBuffer m_InformationReports; |
---|
137 | |
---|
138 | /** total number of subsets evaluated during a search */ |
---|
139 | private int m_totalEvals; |
---|
140 | |
---|
141 | /** holds the merit of the best subset found */ |
---|
142 | protected double m_bestMerit; |
---|
143 | |
---|
144 | /** time for procesing the search method */ |
---|
145 | private long m_processinTime; |
---|
146 | |
---|
147 | /** holds the Initial Population of Subsets*/ |
---|
148 | private List<Subset> m_population; |
---|
149 | |
---|
150 | /** holds the population size*/ |
---|
151 | private int m_popSize; |
---|
152 | |
---|
153 | /** holds the user selected initial population size */ |
---|
154 | private int m_initialPopSize; |
---|
155 | |
---|
156 | /** if no initial user pop size, then this holds the initial |
---|
157 | * pop size calculated from the number of attributes in the data |
---|
158 | * (for use in the toString() method) |
---|
159 | */ |
---|
160 | private int m_calculatedInitialPopSize; |
---|
161 | |
---|
162 | /** holds the subsets most significants and diverses |
---|
163 | * of the population (ReferenceSet). |
---|
164 | * |
---|
165 | * (transient because the subList() method returns |
---|
166 | * a non serializable Object). |
---|
167 | */ |
---|
168 | private transient List<Subset> m_ReferenceSet; |
---|
169 | |
---|
170 | /** holds the greedy combination(reduced or not) of all the subsets of the ReferenceSet*/ |
---|
171 | private transient List<Subset> m_parentsCombination; |
---|
172 | |
---|
173 | /**holds the attributes ranked*/ |
---|
174 | private List<Subset> m_attributeRanking; |
---|
175 | |
---|
176 | /**Evaluator used to know the significance of a subset (for guiding the search)*/ |
---|
177 | private SubsetEvaluator ASEvaluator =null; |
---|
178 | |
---|
179 | |
---|
180 | /** kind of combination */ |
---|
181 | protected static final int COMBINATION_NOT_REDUCED = 0; |
---|
182 | protected static final int COMBINATION_REDUCED = 1; ; |
---|
183 | public static final Tag [] TAGS_SELECTION = { |
---|
184 | new Tag(COMBINATION_NOT_REDUCED, "Greedy Combination"), |
---|
185 | new Tag(COMBINATION_REDUCED, "Reduced Greedy Combination") |
---|
186 | }; |
---|
187 | |
---|
188 | /** |
---|
189 | * Returns a string describing this search method |
---|
190 | * @return a description of the search suitable for |
---|
191 | * displaying in the explorer/experimenter gui |
---|
192 | */ |
---|
193 | public String globalInfo() { |
---|
194 | return "Scatter Search :\n\nPerforms an Scatter Search " |
---|
195 | +"through " |
---|
196 | +"the space of attribute subsets. Start with a population of many significants and diverses subset " |
---|
197 | +" stops when the result is higher than a given treshold or there's not more improvement\n" |
---|
198 | + "For more information see:\n\n" |
---|
199 | + getTechnicalInformation().toString(); |
---|
200 | } |
---|
201 | |
---|
202 | /** |
---|
203 | * Returns an instance of a TechnicalInformation object, containing |
---|
204 | * detailed information about the technical background of this class, |
---|
205 | * e.g., paper reference or book this class is based on. |
---|
206 | * |
---|
207 | * @return the technical information about this class |
---|
208 | */ |
---|
209 | public TechnicalInformation getTechnicalInformation() { |
---|
210 | TechnicalInformation result; |
---|
211 | |
---|
212 | result = new TechnicalInformation(Type.BOOK); |
---|
213 | result.setValue(Field.AUTHOR, "Felix Garcia Lopez"); |
---|
214 | result.setValue(Field.MONTH, "October"); |
---|
215 | result.setValue(Field.YEAR, "2004"); |
---|
216 | result.setValue(Field.TITLE, "Solving feature subset selection problem by a Parallel Scatter Search"); |
---|
217 | result.setValue(Field.PUBLISHER, "Elsevier"); |
---|
218 | result.setValue(Field.LANGUAGE, "English"); |
---|
219 | |
---|
220 | return result; |
---|
221 | } |
---|
222 | |
---|
223 | /** |
---|
224 | * Returns the revision string. |
---|
225 | * |
---|
226 | * @return the revision |
---|
227 | */ |
---|
228 | public String getRevision() { |
---|
229 | return RevisionUtils.extract("$Revision: 1.0$"); |
---|
230 | } |
---|
231 | |
---|
232 | public ScatterSearchV1 () { |
---|
233 | resetOptions(); |
---|
234 | } |
---|
235 | |
---|
236 | /** |
---|
237 | * Returns the tip text for this property |
---|
238 | * @return tip text for this property suitable for |
---|
239 | * displaying in the explorer/experimenter gui |
---|
240 | */ |
---|
241 | public String tresholdTipText() { |
---|
242 | return "Set the treshold that subsets most overcome to be considered as significants"; |
---|
243 | } |
---|
244 | |
---|
245 | /** |
---|
246 | * Set the treshold |
---|
247 | * |
---|
248 | * @param treshold for identifyng significant subsets |
---|
249 | */ |
---|
250 | public void setTreshold (double treshold) { |
---|
251 | m_initialThreshold = treshold; |
---|
252 | } |
---|
253 | |
---|
254 | /** |
---|
255 | * Get the treshold |
---|
256 | * |
---|
257 | * @return the treshold that subsets most overcome to be considered as significants |
---|
258 | */ |
---|
259 | public double getTreshold () { |
---|
260 | return m_initialThreshold; |
---|
261 | } |
---|
262 | |
---|
263 | |
---|
264 | /** |
---|
265 | * Returns the tip text for this property |
---|
266 | * @return tip text for this property suitable for |
---|
267 | * displaying in the explorer/experimenter gui |
---|
268 | */ |
---|
269 | public String populationSizeTipText() { |
---|
270 | return "Set the number of subset to generate in the initial Population"; |
---|
271 | } |
---|
272 | |
---|
273 | /** |
---|
274 | * Set the population size |
---|
275 | * |
---|
276 | * @param size the number of subset in the initial population |
---|
277 | */ |
---|
278 | public void setPopulationSize (int size) { |
---|
279 | m_initialPopSize = size; |
---|
280 | } |
---|
281 | |
---|
282 | /** |
---|
283 | * Get the population size |
---|
284 | * |
---|
285 | * @return the number of subsets to generate in the initial population |
---|
286 | */ |
---|
287 | public int getPopulationSize () { |
---|
288 | return m_initialPopSize; |
---|
289 | } |
---|
290 | |
---|
291 | |
---|
292 | /** |
---|
293 | * Returns the tip text for this property |
---|
294 | * @return tip text for this property suitable for |
---|
295 | * displaying in the explorer/experimenter gui |
---|
296 | */ |
---|
297 | public String combinationTipText() { |
---|
298 | return "Set the kind of combination for using it to combine ReferenceSet subsets."; |
---|
299 | } |
---|
300 | |
---|
301 | /** |
---|
302 | * Set the kind of combination |
---|
303 | * |
---|
304 | * @param c the kind of combination of the search |
---|
305 | */ |
---|
306 | public void setCombination (SelectedTag c) { |
---|
307 | if (c.getTags() == TAGS_SELECTION) { |
---|
308 | m_typeOfCombination = c.getSelectedTag().getID(); |
---|
309 | } |
---|
310 | } |
---|
311 | |
---|
312 | /** |
---|
313 | * Get the combination |
---|
314 | * |
---|
315 | * @return the kind of combination used in the Combination method |
---|
316 | */ |
---|
317 | public SelectedTag getCombination () { |
---|
318 | return new SelectedTag(m_typeOfCombination, TAGS_SELECTION); |
---|
319 | } |
---|
320 | |
---|
321 | /** |
---|
322 | * Returns the tip text for this property |
---|
323 | * @return tip text for this property suitable for |
---|
324 | * displaying in the explorer/experimenter gui |
---|
325 | */ |
---|
326 | public String seedTipText() { |
---|
327 | return "Set the random seed."; |
---|
328 | } |
---|
329 | |
---|
330 | /** |
---|
331 | * set the seed for random number generation |
---|
332 | * @param s seed value |
---|
333 | */ |
---|
334 | public void setSeed(int s) { |
---|
335 | m_seed = s; |
---|
336 | } |
---|
337 | |
---|
338 | /** |
---|
339 | * get the value of the random number generator's seed |
---|
340 | * @return the seed for random number generation |
---|
341 | */ |
---|
342 | public int getSeed() { |
---|
343 | return m_seed; |
---|
344 | } |
---|
345 | |
---|
346 | /** |
---|
347 | * Returns the tip text for this property |
---|
348 | * @return tip text for this property suitable for |
---|
349 | * displaying in the explorer/experimenter gui |
---|
350 | */ |
---|
351 | public String debugTipText() { |
---|
352 | return "Turn on verbose output for monitoring the search's progress."; |
---|
353 | } |
---|
354 | |
---|
355 | /** |
---|
356 | * Set whether verbose output should be generated. |
---|
357 | * @param d true if output is to be verbose. |
---|
358 | */ |
---|
359 | public void setDebug(boolean d) { |
---|
360 | m_debug = d; |
---|
361 | } |
---|
362 | |
---|
363 | /** |
---|
364 | * Get whether output is to be verbose |
---|
365 | * @return true if output will be verbose |
---|
366 | */ |
---|
367 | public boolean getDebug() { |
---|
368 | return m_debug; |
---|
369 | } |
---|
370 | |
---|
371 | /** |
---|
372 | * Returns an enumeration describing the available options. |
---|
373 | * @return an enumeration of all the available options. |
---|
374 | **/ |
---|
375 | public Enumeration listOptions () { |
---|
376 | Vector newVector = new Vector(6); |
---|
377 | |
---|
378 | newVector.addElement(new Option("\tSpecify the number of subsets to generate " |
---|
379 | + "\n\tin the initial population.." |
---|
380 | ,"Z",1 |
---|
381 | , "-Z <num>")); |
---|
382 | newVector.addElement(new Option("\tSpecify the treshold used for considering when a subset is significant." |
---|
383 | , "T", 1 |
---|
384 | , "-T <threshold>")); |
---|
385 | newVector.addElement(new Option("\tSpecify the kind of combiantion " |
---|
386 | + "\n\tfor using it in the combination method." |
---|
387 | , "R", 1, "-R <0 = greedy combination | 1 = reduced greedy combination >")); |
---|
388 | newVector.addElement(new Option("\tSet the random number seed." |
---|
389 | +"\n\t(default = 1)" |
---|
390 | , "S", 1, "-S <seed>")); |
---|
391 | newVector.addElement(new Option("\tVerbose output for monitoring the search.","D",0,"-D")); |
---|
392 | |
---|
393 | return newVector.elements(); |
---|
394 | } |
---|
395 | |
---|
396 | /** |
---|
397 | * Parses a given list of options. |
---|
398 | * |
---|
399 | <!-- options-start --> |
---|
400 | * Valid options are: <p> |
---|
401 | * |
---|
402 | * -Z <br> |
---|
403 | * Specify the number of subsets to generate in the initial population.<p> |
---|
404 | * |
---|
405 | * -T <start set> <br> |
---|
406 | * Specify the treshold used for considering when a subset is significant. <p> |
---|
407 | * |
---|
408 | * -R <br> |
---|
409 | * Specify the kind of combiantion. <p> |
---|
410 | * |
---|
411 | * -S <br> |
---|
412 | * Set the random number seed. |
---|
413 | * (default = 1) |
---|
414 | * |
---|
415 | * -D <br> |
---|
416 | * Verbose output for monitoring the search |
---|
417 | * (default = false) |
---|
418 | * |
---|
419 | <!-- options-end --> |
---|
420 | * |
---|
421 | * @param options the list of options as an array of strings |
---|
422 | * @exception Exception if an option is not supported |
---|
423 | * |
---|
424 | **/ |
---|
425 | public void setOptions (String[] options) |
---|
426 | throws Exception { |
---|
427 | String optionString; |
---|
428 | resetOptions(); |
---|
429 | |
---|
430 | optionString = Utils.getOption('Z', options); |
---|
431 | if (optionString.length() != 0) { |
---|
432 | setPopulationSize(Integer.parseInt(optionString)); |
---|
433 | } |
---|
434 | |
---|
435 | optionString = Utils.getOption('T', options); |
---|
436 | if (optionString.length() != 0) { |
---|
437 | setTreshold(Double.parseDouble(optionString)); |
---|
438 | } |
---|
439 | |
---|
440 | optionString = Utils.getOption('R', options); |
---|
441 | if (optionString.length() != 0) { |
---|
442 | setCombination(new SelectedTag(Integer.parseInt(optionString), |
---|
443 | TAGS_SELECTION)); |
---|
444 | } else { |
---|
445 | setCombination(new SelectedTag(COMBINATION_NOT_REDUCED, TAGS_SELECTION)); |
---|
446 | } |
---|
447 | |
---|
448 | optionString = Utils.getOption('S', options); |
---|
449 | if (optionString.length() != 0) { |
---|
450 | setSeed(Integer.parseInt(optionString)); |
---|
451 | } |
---|
452 | |
---|
453 | setDebug(Utils.getFlag('D', options)); |
---|
454 | } |
---|
455 | |
---|
456 | /** |
---|
457 | * Gets the current settings of ScatterSearchV1. |
---|
458 | * |
---|
459 | * @return an array of strings suitable for passing to setOptions() |
---|
460 | */ |
---|
461 | public String[] getOptions () { |
---|
462 | String[] options = new String[9]; |
---|
463 | int current = 0; |
---|
464 | |
---|
465 | options[current++] = "-T"; |
---|
466 | options[current++] = "" + getTreshold (); |
---|
467 | |
---|
468 | options[current++] = "-Z"; |
---|
469 | options[current++] = ""+getPopulationSize (); |
---|
470 | |
---|
471 | options[current++] = "-R"; |
---|
472 | options[current++] = ""+String.valueOf (getCombination ().getSelectedTag ().getID ()); |
---|
473 | |
---|
474 | options[current++] = "-S"; |
---|
475 | options[current++] = "" + getSeed(); |
---|
476 | |
---|
477 | if (getDebug()) |
---|
478 | options[current++] = "-D"; |
---|
479 | |
---|
480 | while (current < options.length) |
---|
481 | options[current++] = ""; |
---|
482 | |
---|
483 | return options; |
---|
484 | } |
---|
485 | |
---|
486 | /** |
---|
487 | * returns a description of the search. |
---|
488 | * @return a description of the search as a String. |
---|
489 | */ |
---|
490 | public String toString() { |
---|
491 | StringBuffer FString = new StringBuffer(); |
---|
492 | FString.append("\tScatter Search " |
---|
493 | + "\n\tInit Population: "+m_calculatedInitialPopSize); |
---|
494 | |
---|
495 | FString.append("\n\tKind of Combination: " |
---|
496 | +getCombination ().getSelectedTag ().getReadable ()); |
---|
497 | |
---|
498 | FString.append("\n\tRandom number seed: "+m_seed); |
---|
499 | |
---|
500 | FString.append("\n\tDebug: "+m_debug); |
---|
501 | |
---|
502 | FString.append("\n\tTreshold: " |
---|
503 | +Utils.doubleToString(Math.abs(getTreshold ()),8,3)+"\n"); |
---|
504 | |
---|
505 | FString.append("\tTotal number of subsets evaluated: " |
---|
506 | + m_totalEvals + "\n"); |
---|
507 | |
---|
508 | FString.append("\tMerit of best subset found: " |
---|
509 | +Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n"); |
---|
510 | |
---|
511 | /* FString.append("\tTime procesing the search space: " |
---|
512 | +(double)m_processinTime/1000+" seconds\n"); */ |
---|
513 | |
---|
514 | if(m_debug) |
---|
515 | return FString.toString()+"\n\n"+m_InformationReports.toString (); |
---|
516 | |
---|
517 | return FString.toString(); |
---|
518 | } |
---|
519 | |
---|
520 | |
---|
521 | /** |
---|
522 | * Searches the attribute subset space using Scatter Search. |
---|
523 | * |
---|
524 | * @param ASEval the attribute evaluator to guide the search |
---|
525 | * @param data the training instances. |
---|
526 | * @return an array of selected attribute indexes |
---|
527 | * @exception Exception if the search can't be completed |
---|
528 | */ |
---|
529 | public int[] search(ASEvaluation ASEval, Instances data) |
---|
530 | throws Exception{ |
---|
531 | |
---|
532 | m_totalEvals = 0; |
---|
533 | m_popSize = m_initialPopSize; |
---|
534 | m_calculatedInitialPopSize = m_initialPopSize; |
---|
535 | m_treshold = m_initialThreshold; |
---|
536 | m_processinTime =System.currentTimeMillis (); |
---|
537 | m_InformationReports = new StringBuffer(); |
---|
538 | |
---|
539 | m_numAttribs =data.numAttributes (); |
---|
540 | m_classIndex =data.classIndex (); |
---|
541 | |
---|
542 | if(m_popSize<=0) { |
---|
543 | m_popSize =m_numAttribs/2; |
---|
544 | m_calculatedInitialPopSize = m_popSize; |
---|
545 | } |
---|
546 | |
---|
547 | ASEvaluator =(SubsetEvaluator)ASEval; |
---|
548 | |
---|
549 | if(!(m_treshold >= 0)){ |
---|
550 | m_treshold =calculateTreshhold(); |
---|
551 | m_totalEvals++; |
---|
552 | } |
---|
553 | |
---|
554 | m_random = new Random(m_seed); |
---|
555 | |
---|
556 | m_attributeRanking =RankEachAttribute(); |
---|
557 | |
---|
558 | CreatePopulation(m_popSize); |
---|
559 | |
---|
560 | int bestSolutions =m_popSize/4; |
---|
561 | int divSolutions =m_popSize/4; |
---|
562 | |
---|
563 | if(m_popSize < 4){ |
---|
564 | |
---|
565 | bestSolutions = m_popSize/2; |
---|
566 | divSolutions = m_popSize/2; |
---|
567 | |
---|
568 | if(m_popSize == 1) return attributeList(((Subset)m_population.get (0)).subset); |
---|
569 | } |
---|
570 | |
---|
571 | |
---|
572 | m_ReferenceSet =new ArrayList<Subset>(); |
---|
573 | |
---|
574 | for (int i = 0; i<m_population.size (); i++) { |
---|
575 | m_ReferenceSet.add (m_population.get (i)) ; |
---|
576 | } |
---|
577 | |
---|
578 | |
---|
579 | GenerateReferenceSet(m_ReferenceSet, bestSolutions, divSolutions); |
---|
580 | |
---|
581 | |
---|
582 | m_InformationReports.append ("Population: "+m_population.size ()+"\n"); |
---|
583 | m_InformationReports.append ("merit \tsubset\n"); |
---|
584 | |
---|
585 | for (int i = 0; i < m_population.size (); i++) |
---|
586 | m_InformationReports.append (printSubset (m_population.get (i))); |
---|
587 | |
---|
588 | |
---|
589 | m_ReferenceSet =m_ReferenceSet.subList (0,bestSolutions+divSolutions); |
---|
590 | |
---|
591 | |
---|
592 | /*TEST*/ |
---|
593 | m_InformationReports.append ("\nReferenceSet:"); |
---|
594 | m_InformationReports.append ("\n----------------Most Significants Solutions--------------\n"); |
---|
595 | for (int i = 0; i<m_ReferenceSet.size (); i++) { |
---|
596 | if(i ==bestSolutions) m_InformationReports.append ("----------------Most Diverses Solutions--------------\n"); |
---|
597 | m_InformationReports.append(printSubset (m_ReferenceSet.get (i))); |
---|
598 | } |
---|
599 | |
---|
600 | |
---|
601 | Subset bestTemp =new Subset(new BitSet(m_numAttribs),0); |
---|
602 | |
---|
603 | while (!(bestTemp.isEqual (m_ReferenceSet.get (0))) /*|| (m_treshold > bestTemp.merit)*/) { |
---|
604 | //while(){ |
---|
605 | CombineParents(); |
---|
606 | ImproveSolutions(); |
---|
607 | // } |
---|
608 | bestTemp =m_ReferenceSet.get (0); |
---|
609 | |
---|
610 | int numBest =m_ReferenceSet.size ()/2; |
---|
611 | int numDiverses =m_ReferenceSet.size ()/2; |
---|
612 | |
---|
613 | UpdateReferenceSet(numBest, numDiverses); |
---|
614 | m_ReferenceSet = m_ReferenceSet.subList (0,numBest+numDiverses); |
---|
615 | |
---|
616 | } |
---|
617 | |
---|
618 | m_InformationReports.append("\nLast Reference Set Updated:\n"); |
---|
619 | m_InformationReports.append ("merit \tsubset\n"); |
---|
620 | |
---|
621 | for (int i = 0; i <m_ReferenceSet.size (); i++) |
---|
622 | m_InformationReports.append (printSubset (m_ReferenceSet.get (i))); |
---|
623 | |
---|
624 | |
---|
625 | m_bestMerit =bestTemp.merit; |
---|
626 | |
---|
627 | m_processinTime =System.currentTimeMillis () -m_processinTime; |
---|
628 | |
---|
629 | return attributeList (bestTemp.subset); |
---|
630 | } |
---|
631 | |
---|
632 | /** |
---|
633 | * Generate the a ReferenceSet containing the n best solutions and the m most diverse solutions of |
---|
634 | * the initial Population. |
---|
635 | * |
---|
636 | * @param ReferenceSet the ReferenceSet for storing these solutions |
---|
637 | * @param bestSolutions the number of the most pure solutions. |
---|
638 | * @param divSolutions the number of the most diverses solutions acording to the bestSolutions. |
---|
639 | */ |
---|
640 | public void GenerateReferenceSet(List<Subset> ReferenceSet, int bestSolutions, int divSolutions){ |
---|
641 | |
---|
642 | //Sorting the Initial ReferenceSet |
---|
643 | ReferenceSet =bubbleSubsetSort (ReferenceSet); |
---|
644 | |
---|
645 | // storing all the attributes that are now in the ReferenceSet (just till bestSolutions) |
---|
646 | BitSet allBits_RefSet =getAllBits (ReferenceSet.subList (0,bestSolutions)); |
---|
647 | |
---|
648 | // for stopping when ReferenceSet.size () ==bestSolutions+divSolutions |
---|
649 | int refSetlength =bestSolutions; |
---|
650 | int total =bestSolutions+divSolutions; |
---|
651 | |
---|
652 | while (refSetlength <total) { |
---|
653 | |
---|
654 | List<Integer> aux =new ArrayList<Integer>(); |
---|
655 | |
---|
656 | for (int i =refSetlength; i <ReferenceSet.size (); i ++) { |
---|
657 | aux.add (SimetricDiference (((Subset)ReferenceSet.get (i)).clone (),allBits_RefSet)); |
---|
658 | } |
---|
659 | |
---|
660 | |
---|
661 | int mostDiv =getIndexofBiggest(aux); |
---|
662 | ReferenceSet.set(refSetlength, ReferenceSet.get (refSetlength+mostDiv)); |
---|
663 | //ReferenceSet.remove (refSetlength +mostDiv); |
---|
664 | |
---|
665 | refSetlength++; |
---|
666 | |
---|
667 | allBits_RefSet =getAllBits (ReferenceSet.subList (0,refSetlength)); |
---|
668 | } |
---|
669 | |
---|
670 | ReferenceSet =filterSubset (ReferenceSet,refSetlength); |
---|
671 | } |
---|
672 | |
---|
673 | /** |
---|
674 | * Update the ReferenceSet putting the new obtained Solutions there |
---|
675 | * |
---|
676 | * @param numBestSolutions the number of the most pure solutions. |
---|
677 | * @param numDivsSolutions the number of the most diverses solutions acording to the bestSolutions. |
---|
678 | */ |
---|
679 | public void UpdateReferenceSet(int numBestSolutions, int numDivsSolutions){ |
---|
680 | |
---|
681 | for (int i = 0; i <m_parentsCombination.size (); i++) m_ReferenceSet.add (i, m_parentsCombination.get (i)); |
---|
682 | |
---|
683 | GenerateReferenceSet (m_ReferenceSet,numBestSolutions,numDivsSolutions); |
---|
684 | } |
---|
685 | |
---|
686 | /** |
---|
687 | * Improve the solutions previously combined by adding the attributes that improve that solution |
---|
688 | * @exception Exception if there is some trouble evaluating the candidate solutions |
---|
689 | */ |
---|
690 | public void ImproveSolutions() |
---|
691 | throws Exception{ |
---|
692 | |
---|
693 | for (int i = 0; i<m_parentsCombination.size (); i++) { |
---|
694 | |
---|
695 | BitSet aux1 =(BitSet)((Subset)m_parentsCombination.get (i)).subset.clone (); |
---|
696 | List<Subset> ranking =new ArrayList<Subset>(); |
---|
697 | |
---|
698 | /* |
---|
699 | for(int j=aux1.nextClearBit (0); j<=m_numAttribs; j=aux1.nextClearBit(j+1)){ |
---|
700 | if(j ==m_classIndex)continue; |
---|
701 | |
---|
702 | BitSet aux2 =new BitSet(m_numAttribs); |
---|
703 | aux2.set (j); |
---|
704 | |
---|
705 | double merit =ASEvaluator.evaluateSubset (aux2); |
---|
706 | m_totalEvals++; |
---|
707 | |
---|
708 | ranking.add (new Subset((BitSet)aux2.clone (), merit)); |
---|
709 | } |
---|
710 | |
---|
711 | ranking =bubbleSubsetSort (ranking); |
---|
712 | */ |
---|
713 | |
---|
714 | for (int k =0; k <m_attributeRanking.size (); k ++) { |
---|
715 | Subset s1 =((Subset)m_attributeRanking.get (k)).clone (); |
---|
716 | BitSet b1 =(BitSet)s1.subset.clone (); |
---|
717 | |
---|
718 | Subset s2 =((Subset)m_parentsCombination.get (i)).clone (); |
---|
719 | BitSet b2 =(BitSet)s2.subset.clone (); |
---|
720 | |
---|
721 | if(b2.get (b1.nextSetBit (0))) continue; |
---|
722 | |
---|
723 | b2.or (b1); |
---|
724 | double newMerit =ASEvaluator.evaluateSubset (b2); |
---|
725 | m_totalEvals++; |
---|
726 | |
---|
727 | if(newMerit <= s2.merit)break; |
---|
728 | |
---|
729 | m_parentsCombination.set (i,new Subset(b2,newMerit)); |
---|
730 | } |
---|
731 | |
---|
732 | filterSubset (m_parentsCombination,m_ReferenceSet.size()); |
---|
733 | } |
---|
734 | } |
---|
735 | |
---|
736 | /** |
---|
737 | * Combine all the posible pair solutions existing in the Population |
---|
738 | * |
---|
739 | * @exception Exception if there is some trouble evaluating the new childs |
---|
740 | */ |
---|
741 | public void CombineParents() |
---|
742 | throws Exception{ |
---|
743 | |
---|
744 | m_parentsCombination =new ArrayList<Subset>(); |
---|
745 | |
---|
746 | // this two 'for' are for selecting parents in the refSet |
---|
747 | for (int i= 0; i <m_ReferenceSet.size ()-1; i ++) { |
---|
748 | for (int j= i+1; j <m_ReferenceSet.size (); j ++) { |
---|
749 | |
---|
750 | // Selecting parents |
---|
751 | Subset parent1 =m_ReferenceSet.get (i); |
---|
752 | Subset parent2 =m_ReferenceSet.get (j); |
---|
753 | |
---|
754 | // Initializing childs Intersecting parents |
---|
755 | Subset child1 = intersectSubsets (parent1, parent2); |
---|
756 | Subset child2 =child1.clone (); |
---|
757 | |
---|
758 | // Initializing childs Intersecting parents |
---|
759 | Subset simDif =simetricDif (parent1, parent2, getCombination ().getSelectedTag ().getID ()); |
---|
760 | |
---|
761 | BitSet aux =(BitSet)simDif.subset.clone (); |
---|
762 | |
---|
763 | boolean improvement =true; |
---|
764 | |
---|
765 | while (improvement) { |
---|
766 | |
---|
767 | Subset best1 =getBestgen (child1,aux); |
---|
768 | Subset best2 =getBestgen (child2,aux); |
---|
769 | |
---|
770 | if(best1 !=null || best2!=null){ |
---|
771 | |
---|
772 | if(best2 ==null){ |
---|
773 | child1 =best1.clone (); |
---|
774 | continue; |
---|
775 | } |
---|
776 | if(best1 ==null){ |
---|
777 | child2 =best2.clone (); |
---|
778 | continue; |
---|
779 | } |
---|
780 | if(best1 !=null && best2 !=null){ |
---|
781 | double merit1 =best1.merit; |
---|
782 | double merit2 =best2.merit; |
---|
783 | |
---|
784 | if(merit1 >merit2){ |
---|
785 | child1 =best1.clone (); |
---|
786 | continue; |
---|
787 | } |
---|
788 | if(merit1 <merit2){ |
---|
789 | child2 =best2.clone (); |
---|
790 | continue; |
---|
791 | } |
---|
792 | if(merit1 ==merit2){ |
---|
793 | if(best1.subset.cardinality () > best2.subset.cardinality ()){ |
---|
794 | child2 =best2.clone (); |
---|
795 | continue; |
---|
796 | } |
---|
797 | if(best1.subset.cardinality () < best2.subset.cardinality ()){ |
---|
798 | child1 =best1.clone (); |
---|
799 | continue; |
---|
800 | } |
---|
801 | if(best1.subset.cardinality () == best2.subset.cardinality ()){ |
---|
802 | double random = m_random.nextDouble (); |
---|
803 | if(random < 0.5)child1 =best1.clone (); |
---|
804 | else child2 =best2.clone (); |
---|
805 | continue; |
---|
806 | } |
---|
807 | } |
---|
808 | } |
---|
809 | |
---|
810 | }else{ |
---|
811 | m_parentsCombination.add (child1); |
---|
812 | m_parentsCombination.add (child2); |
---|
813 | improvement =false; |
---|
814 | } |
---|
815 | } |
---|
816 | } |
---|
817 | } |
---|
818 | m_parentsCombination = filterSubset (m_parentsCombination,m_ReferenceSet.size()); |
---|
819 | |
---|
820 | GenerateReferenceSet (m_parentsCombination,m_ReferenceSet.size ()/2, m_ReferenceSet.size ()/2); |
---|
821 | m_parentsCombination = m_parentsCombination.subList (0, m_ReferenceSet.size ()); |
---|
822 | |
---|
823 | } |
---|
824 | /** |
---|
825 | * Create the initial Population |
---|
826 | * |
---|
827 | * @param popSize the size of the initial population |
---|
828 | * @exception Exception if there is a trouble evaluating any solution |
---|
829 | */ |
---|
830 | public void CreatePopulation(int popSize) |
---|
831 | throws Exception{ |
---|
832 | |
---|
833 | InitPopulation(popSize); |
---|
834 | |
---|
835 | /** Delimit the best attributes from the worst*/ |
---|
836 | int segmentation =m_numAttribs/2; |
---|
837 | |
---|
838 | /*TEST*/ |
---|
839 | /* System.out.println ("AttributeRanking"); |
---|
840 | for (int i = 0; i <attributeRanking.size (); i++){ |
---|
841 | if(i ==segmentation)System.out.println ("-------------------------SEGMENTATION------------------------"); |
---|
842 | printSubset (attributeRanking.get (i)); |
---|
843 | } |
---|
844 | */ |
---|
845 | for (int i = 0; i<m_popSize; i++) { |
---|
846 | |
---|
847 | List<Subset> attributeRankingCopy = new ArrayList<Subset>(); |
---|
848 | for (int j = 0; j<m_attributeRanking.size (); j++) attributeRankingCopy.add (m_attributeRanking.get (j)); |
---|
849 | |
---|
850 | |
---|
851 | double last_evaluation =-999; |
---|
852 | double current_evaluation =0; |
---|
853 | |
---|
854 | boolean doneAnew =true; |
---|
855 | |
---|
856 | while (true) { |
---|
857 | |
---|
858 | // generate a random number in the interval[0..segmentation] |
---|
859 | int random_number = m_random.nextInt (segmentation+1) /*generateRandomNumber (segmentation)*/; |
---|
860 | |
---|
861 | if(doneAnew && i <=segmentation)random_number =i; |
---|
862 | doneAnew =false; |
---|
863 | |
---|
864 | Subset s1 =((Subset)attributeRankingCopy.get (random_number)).clone (); |
---|
865 | Subset s2 =((Subset)m_population.get (i)).clone (); |
---|
866 | |
---|
867 | |
---|
868 | // trying to add a new gen in the chromosome i of the population |
---|
869 | Subset joiners =joinSubsets (s1, s2 ); |
---|
870 | |
---|
871 | current_evaluation =joiners.merit; |
---|
872 | |
---|
873 | if(current_evaluation > last_evaluation){ |
---|
874 | m_population.set (i,joiners); |
---|
875 | last_evaluation =current_evaluation; |
---|
876 | |
---|
877 | try { |
---|
878 | attributeRankingCopy.set (random_number, attributeRankingCopy.get (segmentation+1)); |
---|
879 | attributeRankingCopy.remove (segmentation+1); |
---|
880 | }catch (IndexOutOfBoundsException ex) { |
---|
881 | attributeRankingCopy.set (random_number,new Subset(new BitSet(m_numAttribs),0)); |
---|
882 | continue; |
---|
883 | } |
---|
884 | } |
---|
885 | else{ |
---|
886 | // there's not more improvement |
---|
887 | break; |
---|
888 | } |
---|
889 | |
---|
890 | |
---|
891 | } |
---|
892 | } |
---|
893 | |
---|
894 | //m_population =bubbleSubsetSort (m_population); |
---|
895 | } |
---|
896 | |
---|
897 | |
---|
898 | /** |
---|
899 | * Rank all the attributes individually acording to their merits |
---|
900 | * |
---|
901 | * @return an ordered List of Subsets with just one attribute |
---|
902 | * @exception Exception if the evaluation can not be completed |
---|
903 | */ |
---|
904 | public List<Subset> RankEachAttribute() |
---|
905 | throws Exception{ |
---|
906 | |
---|
907 | List<Subset> result =new ArrayList<Subset>(); |
---|
908 | |
---|
909 | for (int i = 0; i<m_numAttribs; i++) { |
---|
910 | if(i==m_classIndex)continue; |
---|
911 | |
---|
912 | BitSet an_Attribute =new BitSet(m_numAttribs); |
---|
913 | an_Attribute.set (i); |
---|
914 | |
---|
915 | double merit =ASEvaluator.evaluateSubset (an_Attribute); |
---|
916 | m_totalEvals++; |
---|
917 | |
---|
918 | result.add (new Subset(an_Attribute, merit)); |
---|
919 | } |
---|
920 | |
---|
921 | return bubbleSubsetSort(result); |
---|
922 | } |
---|
923 | |
---|
924 | |
---|
925 | //.......... |
---|
926 | |
---|
927 | |
---|
928 | /** |
---|
929 | * Evaluate each gen of a BitSet inserted in a Subset and get the most significant for that Subset |
---|
930 | * |
---|
931 | * @return a new Subset with the union of subset and the best gen of gens. |
---|
932 | * in case that there's not improvement with each gen return null |
---|
933 | * @exception Exception if the evaluation of can not be completed |
---|
934 | */ |
---|
935 | public Subset getBestgen(Subset subset, BitSet gens) |
---|
936 | throws Exception{ |
---|
937 | Subset result =null; |
---|
938 | |
---|
939 | double merit1 =subset.merit; |
---|
940 | |
---|
941 | for(int i =gens.nextSetBit(0); i >=0; i =gens.nextSetBit(i+1)){ |
---|
942 | BitSet aux =(BitSet)subset.subset.clone (); |
---|
943 | |
---|
944 | if(aux.get (i))continue; |
---|
945 | aux.set (i); |
---|
946 | |
---|
947 | double merit2 =ASEvaluator.evaluateSubset (aux); |
---|
948 | m_totalEvals++; |
---|
949 | |
---|
950 | if(merit2 >merit1){ |
---|
951 | merit1 =merit2; |
---|
952 | result =new Subset(aux,merit1); |
---|
953 | } |
---|
954 | } |
---|
955 | |
---|
956 | return result; |
---|
957 | } |
---|
958 | |
---|
959 | /** |
---|
960 | * Sort a List of subsets according to their merits |
---|
961 | * |
---|
962 | * @param subsetList the subsetList to be ordered |
---|
963 | * @return a List with ordered subsets |
---|
964 | */ |
---|
965 | public List<Subset> bubbleSubsetSort(List<Subset> subsetList){ |
---|
966 | List<Subset> result =new ArrayList<Subset>(); |
---|
967 | |
---|
968 | for (int i = 0; i<subsetList.size ()-1; i++) { |
---|
969 | Subset subset1 =subsetList.get (i); |
---|
970 | double merit1 =subset1.merit; |
---|
971 | |
---|
972 | for (int j = i+1; j<subsetList.size (); j++) { |
---|
973 | Subset subset2 =subsetList.get (j); |
---|
974 | double merit2 =subset2.merit; |
---|
975 | |
---|
976 | if(merit2 > merit1){ |
---|
977 | Subset temp =subset1; |
---|
978 | |
---|
979 | subsetList.set (i,subset2); |
---|
980 | subsetList.set (j,temp); |
---|
981 | |
---|
982 | subset1 =subset2; |
---|
983 | merit1 =subset1.merit; |
---|
984 | } |
---|
985 | } |
---|
986 | } |
---|
987 | return subsetList; |
---|
988 | } |
---|
989 | |
---|
990 | |
---|
991 | /** |
---|
992 | * get the index in a List where this have the biggest number |
---|
993 | * |
---|
994 | * @param simDif the Lists of numbers for getting from them the index of the bigger |
---|
995 | * @return an index that represents where the bigest number is. |
---|
996 | */ |
---|
997 | public int getIndexofBiggest(List<Integer> simDif){ |
---|
998 | int aux =-99999; |
---|
999 | int result1 =-1; |
---|
1000 | List<Integer> equalSimDif =new ArrayList<Integer>(); |
---|
1001 | |
---|
1002 | if(simDif.size ()==0) return -1; |
---|
1003 | |
---|
1004 | for (int i = 0; i<simDif.size (); i++) { |
---|
1005 | if(simDif.get (i) >aux){ |
---|
1006 | aux =simDif.get (i); |
---|
1007 | result1 =i; |
---|
1008 | } |
---|
1009 | } |
---|
1010 | |
---|
1011 | for (int i =0; i <simDif.size (); i++) { |
---|
1012 | if(simDif.get (i) ==aux){ |
---|
1013 | equalSimDif.add (i); |
---|
1014 | } |
---|
1015 | } |
---|
1016 | |
---|
1017 | int finalResult =equalSimDif.get (m_random.nextInt (equalSimDif.size ()) /*generateRandomNumber (equalSimDif.size ()-1)*/); |
---|
1018 | |
---|
1019 | return finalResult; |
---|
1020 | } |
---|
1021 | |
---|
1022 | /** |
---|
1023 | * Save in Bitset all the gens that are in many others subsets. |
---|
1024 | * |
---|
1025 | * @param subsets the Lists of subsets for getting from them all their gens |
---|
1026 | * @return a Bitset with all the gens contained in many others subsets. |
---|
1027 | */ |
---|
1028 | public BitSet getAllBits(List<Subset> subsets){ |
---|
1029 | BitSet result =new BitSet(m_numAttribs); |
---|
1030 | |
---|
1031 | for (int i =0; i <subsets.size (); i ++) { |
---|
1032 | BitSet aux =((Subset)subsets.get (i)).clone ().subset; |
---|
1033 | |
---|
1034 | for(int j=aux.nextSetBit(0); j>=0; j=aux.nextSetBit(j+1)) { |
---|
1035 | result.set (j); |
---|
1036 | } |
---|
1037 | } |
---|
1038 | |
---|
1039 | return result; |
---|
1040 | } |
---|
1041 | |
---|
1042 | /** |
---|
1043 | * Creating space for introducing the population |
---|
1044 | * |
---|
1045 | * @param popSize the number of subset in the initial population |
---|
1046 | */ |
---|
1047 | public void InitPopulation(int popSize){ |
---|
1048 | m_population =new ArrayList<Subset>(); |
---|
1049 | for (int i = 0; i<popSize; i++)m_population.add (new Subset(new BitSet(m_numAttribs),0)); |
---|
1050 | } |
---|
1051 | |
---|
1052 | /** |
---|
1053 | * Join two subsets |
---|
1054 | * |
---|
1055 | * @param subset1 one of the subsets |
---|
1056 | * @param subset2 the other subset |
---|
1057 | * @return a new Subset that is te result of the Join |
---|
1058 | * @exception Exception if the evaluation of the subsets can not be completed |
---|
1059 | */ |
---|
1060 | public Subset joinSubsets(Subset subset1, Subset subset2) |
---|
1061 | throws Exception{ |
---|
1062 | BitSet b1 =(BitSet)subset1.subset.clone (); |
---|
1063 | BitSet b2 =(BitSet)subset2.subset.clone (); |
---|
1064 | |
---|
1065 | b1.or (b2); |
---|
1066 | |
---|
1067 | double newMerit =ASEvaluator.evaluateSubset (b1); |
---|
1068 | m_totalEvals++; |
---|
1069 | |
---|
1070 | return new Subset((BitSet)b1.clone (), newMerit); |
---|
1071 | } |
---|
1072 | |
---|
1073 | /** |
---|
1074 | * Intersects two subsets |
---|
1075 | * |
---|
1076 | * @param subset1 one of the subsets |
---|
1077 | * @param subset2 the other subset |
---|
1078 | * @return a new Subset that is te result of the intersection |
---|
1079 | * @exception Exception if the evaluation of the subsets can not be completed |
---|
1080 | */ |
---|
1081 | public Subset intersectSubsets(Subset subset1, Subset subset2) |
---|
1082 | throws Exception{ |
---|
1083 | BitSet b1 =(BitSet)subset1.subset.clone (); |
---|
1084 | BitSet b2 =(BitSet)subset2.subset.clone (); |
---|
1085 | |
---|
1086 | b1.and (b2); |
---|
1087 | |
---|
1088 | double newMerit =ASEvaluator.evaluateSubset (b1); |
---|
1089 | m_totalEvals++; |
---|
1090 | |
---|
1091 | return new Subset((BitSet)b1.clone (), newMerit); |
---|
1092 | } |
---|
1093 | |
---|
1094 | public Subset simetricDif(Subset subset1, Subset subset2, int mode) |
---|
1095 | throws Exception{ |
---|
1096 | BitSet b1 =(BitSet)subset1.subset.clone (); |
---|
1097 | BitSet b2 =(BitSet)subset2.subset.clone (); |
---|
1098 | |
---|
1099 | b1.xor (b2); |
---|
1100 | |
---|
1101 | double newMerit =ASEvaluator.evaluateSubset (b1); |
---|
1102 | m_totalEvals++; |
---|
1103 | |
---|
1104 | Subset result =new Subset((BitSet)b1.clone (), newMerit); |
---|
1105 | |
---|
1106 | if(mode == COMBINATION_REDUCED){ |
---|
1107 | |
---|
1108 | double avgAcurracy =0; |
---|
1109 | int totalSolutions =0; |
---|
1110 | List<Subset> weightVector =new ArrayList<Subset>(); |
---|
1111 | |
---|
1112 | BitSet res =result.subset; |
---|
1113 | for(int i=res.nextSetBit(0); i>=0; i=res.nextSetBit(i+1)){ |
---|
1114 | |
---|
1115 | double merits =0; |
---|
1116 | int numSolutions =0; |
---|
1117 | Subset solution =null; |
---|
1118 | |
---|
1119 | for (int j = 0; j <m_ReferenceSet.size (); j ++) { |
---|
1120 | solution =(Subset)m_ReferenceSet.get (j); |
---|
1121 | if(solution.subset.get (i)){ |
---|
1122 | merits +=solution.merit; |
---|
1123 | numSolutions ++; |
---|
1124 | } |
---|
1125 | } |
---|
1126 | BitSet b =new BitSet(m_numAttribs); |
---|
1127 | b.set (i); |
---|
1128 | Subset s =new Subset(b, merits/(double)numSolutions); |
---|
1129 | weightVector.add (s); |
---|
1130 | |
---|
1131 | avgAcurracy +=merits; |
---|
1132 | totalSolutions ++; |
---|
1133 | |
---|
1134 | } |
---|
1135 | avgAcurracy =avgAcurracy/(double)totalSolutions; |
---|
1136 | |
---|
1137 | BitSet newResult =new BitSet(m_numAttribs); |
---|
1138 | for (int i = 0; i<weightVector.size (); i++) { |
---|
1139 | Subset aux =weightVector.get (i); |
---|
1140 | if(aux.merit >=avgAcurracy){ |
---|
1141 | newResult.or (aux.subset); |
---|
1142 | } |
---|
1143 | } |
---|
1144 | double merit =ASEvaluator.evaluateSubset (newResult); |
---|
1145 | result =new Subset(newResult, merit); |
---|
1146 | |
---|
1147 | } |
---|
1148 | |
---|
1149 | return result; |
---|
1150 | } |
---|
1151 | |
---|
1152 | public int generateRandomNumber(int limit){ |
---|
1153 | |
---|
1154 | return (int)Math.round (Math.random ()*(limit+0.4)); |
---|
1155 | } |
---|
1156 | |
---|
1157 | |
---|
1158 | |
---|
1159 | /** |
---|
1160 | * Calculate the treshold of a dataSet given an evaluator |
---|
1161 | * |
---|
1162 | * @return the treshhold of the dataSet |
---|
1163 | * @exception Exception if the calculation can not be completed |
---|
1164 | */ |
---|
1165 | public double calculateTreshhold() |
---|
1166 | throws Exception{ |
---|
1167 | BitSet fullSet =new BitSet(m_numAttribs); |
---|
1168 | |
---|
1169 | for (int i= 0; i< m_numAttribs; i++) { |
---|
1170 | if(i ==m_classIndex)continue; |
---|
1171 | fullSet.set (i); |
---|
1172 | } |
---|
1173 | |
---|
1174 | return ASEvaluator.evaluateSubset (fullSet); |
---|
1175 | } |
---|
1176 | |
---|
1177 | /** |
---|
1178 | * Calculate the Simetric Diference of two subsets |
---|
1179 | * |
---|
1180 | * @return the Simetric Diference |
---|
1181 | * @exception Exception if the calculation can not be completed |
---|
1182 | */ |
---|
1183 | public int SimetricDiference(Subset subset, BitSet bitset){ |
---|
1184 | BitSet aux =subset.clone ().subset; |
---|
1185 | aux.xor (bitset); |
---|
1186 | |
---|
1187 | return aux.cardinality (); |
---|
1188 | } |
---|
1189 | |
---|
1190 | /** |
---|
1191 | * Filter a given Lis of Subsets removing the equals subsets |
---|
1192 | * @param subsetList to filter |
---|
1193 | * @param preferredSize the preferred size of the new List (if it is -1, then the filter is make it |
---|
1194 | * for all subsets, else then the filter method stops when the given preferred |
---|
1195 | * size is reached or all the subset have been filtered). |
---|
1196 | * @return a new List filtered |
---|
1197 | * @exception Exception if the calculation can not be completed |
---|
1198 | */ |
---|
1199 | public List<Subset> filterSubset(List<Subset> subsetList, int preferredSize){ |
---|
1200 | if(subsetList.size () <=preferredSize && preferredSize !=-1)return subsetList; |
---|
1201 | |
---|
1202 | for (int i =0; i <subsetList.size ()-1; i ++) { |
---|
1203 | for (int j =i+1; j <subsetList.size (); j ++) { |
---|
1204 | Subset focus =subsetList.get (i); |
---|
1205 | if(focus.isEqual (subsetList.get (j))){ |
---|
1206 | subsetList.remove (j); |
---|
1207 | j--; |
---|
1208 | |
---|
1209 | if(subsetList.size () <=preferredSize && preferredSize !=-1)return subsetList; |
---|
1210 | } |
---|
1211 | } |
---|
1212 | } |
---|
1213 | return subsetList; |
---|
1214 | } |
---|
1215 | //.......... |
---|
1216 | |
---|
1217 | |
---|
1218 | // Test Methods |
---|
1219 | |
---|
1220 | public String printSubset(Subset subset){ |
---|
1221 | StringBuffer bufferString = new StringBuffer(); |
---|
1222 | |
---|
1223 | if(subset == null){ |
---|
1224 | //System.out.println ("null"); |
---|
1225 | return ""; |
---|
1226 | } |
---|
1227 | |
---|
1228 | BitSet bits =subset.subset; |
---|
1229 | double merit =subset.merit; |
---|
1230 | List<Integer> indexes =new ArrayList<Integer>(); |
---|
1231 | |
---|
1232 | for (int i = 0; i<m_numAttribs; i++) { |
---|
1233 | if(bits.get (i)){ |
---|
1234 | //System.out.print ("1"); |
---|
1235 | indexes.add (i+1); |
---|
1236 | } |
---|
1237 | //else System.out.print ("0"); |
---|
1238 | } |
---|
1239 | bufferString.append (Utils.doubleToString (merit,8,5)+"\t "+indexes.toString ()+"\n"); |
---|
1240 | //System.out.print (" with a merit of: "+merit); |
---|
1241 | |
---|
1242 | return bufferString.toString (); |
---|
1243 | } |
---|
1244 | |
---|
1245 | //........ |
---|
1246 | |
---|
1247 | protected void resetOptions () { |
---|
1248 | m_popSize = -1; |
---|
1249 | m_initialPopSize = -1; |
---|
1250 | m_calculatedInitialPopSize = -1; |
---|
1251 | m_treshold = -1; |
---|
1252 | m_typeOfCombination = COMBINATION_NOT_REDUCED; |
---|
1253 | m_seed = 1; |
---|
1254 | m_debug = true; |
---|
1255 | m_totalEvals = 0; |
---|
1256 | m_bestMerit = 0; |
---|
1257 | m_processinTime = 0; |
---|
1258 | } |
---|
1259 | |
---|
1260 | /** |
---|
1261 | * converts a BitSet into a list of attribute indexes |
---|
1262 | * @param group the BitSet to convert |
---|
1263 | * @return an array of attribute indexes |
---|
1264 | **/ |
---|
1265 | public int[] attributeList (BitSet group) { |
---|
1266 | int count = 0; |
---|
1267 | |
---|
1268 | // count how many were selected |
---|
1269 | for (int i = 0; i < m_numAttribs; i++) { |
---|
1270 | if (group.get(i)) { |
---|
1271 | count++; |
---|
1272 | } |
---|
1273 | } |
---|
1274 | |
---|
1275 | int[] list = new int[count]; |
---|
1276 | count = 0; |
---|
1277 | |
---|
1278 | for (int i = 0; i < m_numAttribs; i++) { |
---|
1279 | if (group.get(i)) { |
---|
1280 | list[count++] = i; |
---|
1281 | } |
---|
1282 | } |
---|
1283 | |
---|
1284 | return list; |
---|
1285 | } |
---|
1286 | |
---|
1287 | |
---|
1288 | // Auxiliar Class for handling Chromosomes and its respective merit |
---|
1289 | |
---|
1290 | public class Subset implements Serializable { |
---|
1291 | |
---|
1292 | double merit; |
---|
1293 | BitSet subset; |
---|
1294 | |
---|
1295 | public Subset(BitSet subset, double merit){ |
---|
1296 | this.subset =(BitSet)subset.clone (); |
---|
1297 | this.merit =merit; |
---|
1298 | } |
---|
1299 | |
---|
1300 | public boolean isEqual(Subset othersubset){ |
---|
1301 | if(subset.equals (othersubset.subset))return true; |
---|
1302 | return false; |
---|
1303 | } |
---|
1304 | |
---|
1305 | public Subset clone(){ |
---|
1306 | return new Subset((BitSet)subset.clone (), merit); |
---|
1307 | } |
---|
1308 | } |
---|
1309 | //.......... |
---|
1310 | |
---|
1311 | } |
---|
1312 | |
---|