1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * RotationForest.java |
---|
19 | * Copyright (C) 2008 Juan Jose Rodriguez |
---|
20 | * Copyright (C) 2008 University of Waikato, Hamilton, New Zealand |
---|
21 | * |
---|
22 | */ |
---|
23 | |
---|
24 | |
---|
25 | package weka.classifiers.meta; |
---|
26 | |
---|
27 | import weka.classifiers.RandomizableParallelIteratedSingleClassifierEnhancer; |
---|
28 | import weka.core.Attribute; |
---|
29 | import weka.core.FastVector; |
---|
30 | import weka.core.Instance; |
---|
31 | import weka.core.DenseInstance; |
---|
32 | import weka.core.Instances; |
---|
33 | import weka.core.Option; |
---|
34 | import weka.core.OptionHandler; |
---|
35 | import weka.core.Randomizable; |
---|
36 | import weka.core.RevisionUtils; |
---|
37 | import weka.core.TechnicalInformation; |
---|
38 | import weka.core.TechnicalInformationHandler; |
---|
39 | import weka.core.Utils; |
---|
40 | import weka.core.WeightedInstancesHandler; |
---|
41 | import weka.core.TechnicalInformation.Field; |
---|
42 | import weka.core.TechnicalInformation.Type; |
---|
43 | import weka.filters.Filter; |
---|
44 | import weka.filters.unsupervised.attribute.Normalize; |
---|
45 | import weka.filters.unsupervised.attribute.PrincipalComponents; |
---|
46 | import weka.filters.unsupervised.attribute.RemoveUseless; |
---|
47 | import weka.filters.unsupervised.instance.RemovePercentage; |
---|
48 | |
---|
49 | import java.util.Enumeration; |
---|
50 | import java.util.Random; |
---|
51 | import java.util.Vector; |
---|
52 | |
---|
53 | /** |
---|
54 | <!-- globalinfo-start --> |
---|
55 | * Class for construction a Rotation Forest. Can do classification and regression depending on the base learner. <br/> |
---|
56 | * <br/> |
---|
57 | * For more information, see<br/> |
---|
58 | * <br/> |
---|
59 | * Juan J. Rodriguez, Ludmila I. Kuncheva, Carlos J. Alonso (2006). Rotation Forest: A new classifier ensemble method. IEEE Transactions on Pattern Analysis and Machine Intelligence. 28(10):1619-1630. URL http://doi.ieeecomputersociety.org/10.1109/TPAMI.2006.211. |
---|
60 | * <p/> |
---|
61 | <!-- globalinfo-end --> |
---|
62 | * |
---|
63 | <!-- technical-bibtex-start --> |
---|
64 | * BibTeX: |
---|
65 | * <pre> |
---|
66 | * @article{Rodriguez2006, |
---|
67 | * author = {Juan J. Rodriguez and Ludmila I. Kuncheva and Carlos J. Alonso}, |
---|
68 | * journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, |
---|
69 | * number = {10}, |
---|
70 | * pages = {1619-1630}, |
---|
71 | * title = {Rotation Forest: A new classifier ensemble method}, |
---|
72 | * volume = {28}, |
---|
73 | * year = {2006}, |
---|
74 | * ISSN = {0162-8828}, |
---|
75 | * URL = {http://doi.ieeecomputersociety.org/10.1109/TPAMI.2006.211} |
---|
76 | * } |
---|
77 | * </pre> |
---|
78 | * <p/> |
---|
79 | <!-- technical-bibtex-end --> |
---|
80 | * |
---|
81 | <!-- options-start --> |
---|
82 | * Valid options are: <p/> |
---|
83 | * |
---|
84 | * <pre> -N |
---|
85 | * Whether minGroup (-G) and maxGroup (-H) refer to |
---|
86 | * the number of groups or their size. |
---|
87 | * (default: false)</pre> |
---|
88 | * |
---|
89 | * <pre> -G <num> |
---|
90 | * Minimum size of a group of attributes: |
---|
91 | * if numberOfGroups is true, the minimum number |
---|
92 | * of groups. |
---|
93 | * (default: 3)</pre> |
---|
94 | * |
---|
95 | * <pre> -H <num> |
---|
96 | * Maximum size of a group of attributes: |
---|
97 | * if numberOfGroups is true, the maximum number |
---|
98 | * of groups. |
---|
99 | * (default: 3)</pre> |
---|
100 | * |
---|
101 | * <pre> -P <num> |
---|
102 | * Percentage of instances to be removed. |
---|
103 | * (default: 50)</pre> |
---|
104 | * |
---|
105 | * <pre> -F <filter specification> |
---|
106 | * Full class name of filter to use, followed |
---|
107 | * by filter options. |
---|
108 | * eg: "weka.filters.unsupervised.attribute.PrincipalComponents-R 1.0"</pre> |
---|
109 | * |
---|
110 | * <pre> -S <num> |
---|
111 | * Random number seed. |
---|
112 | * (default 1)</pre> |
---|
113 | * |
---|
114 | * <pre> -I <num> |
---|
115 | * Number of iterations. |
---|
116 | * (default 10)</pre> |
---|
117 | * |
---|
118 | * <pre> -D |
---|
119 | * If set, classifier is run in debug mode and |
---|
120 | * may output additional info to the console</pre> |
---|
121 | * |
---|
122 | * <pre> -W |
---|
123 | * Full name of base classifier. |
---|
124 | * (default: weka.classifiers.trees.J48)</pre> |
---|
125 | * |
---|
126 | * <pre> |
---|
127 | * Options specific to classifier weka.classifiers.trees.J48: |
---|
128 | * </pre> |
---|
129 | * |
---|
130 | * <pre> -U |
---|
131 | * Use unpruned tree.</pre> |
---|
132 | * |
---|
133 | * <pre> -C <pruning confidence> |
---|
134 | * Set confidence threshold for pruning. |
---|
135 | * (default 0.25)</pre> |
---|
136 | * |
---|
137 | * <pre> -M <minimum number of instances> |
---|
138 | * Set minimum number of instances per leaf. |
---|
139 | * (default 2)</pre> |
---|
140 | * |
---|
141 | * <pre> -R |
---|
142 | * Use reduced error pruning.</pre> |
---|
143 | * |
---|
144 | * <pre> -N <number of folds> |
---|
145 | * Set number of folds for reduced error |
---|
146 | * pruning. One fold is used as pruning set. |
---|
147 | * (default 3)</pre> |
---|
148 | * |
---|
149 | * <pre> -B |
---|
150 | * Use binary splits only.</pre> |
---|
151 | * |
---|
152 | * <pre> -S |
---|
153 | * Don't perform subtree raising.</pre> |
---|
154 | * |
---|
155 | * <pre> -L |
---|
156 | * Do not clean up after the tree has been built.</pre> |
---|
157 | * |
---|
158 | * <pre> -A |
---|
159 | * Laplace smoothing for predicted probabilities.</pre> |
---|
160 | * |
---|
161 | * <pre> -Q <seed> |
---|
162 | * Seed for random data shuffling (default 1).</pre> |
---|
163 | * |
---|
164 | <!-- options-end --> |
---|
165 | * |
---|
166 | * @author Juan Jose Rodriguez (jjrodriguez@ubu.es) |
---|
167 | * @version $Revision: 5987 $ |
---|
168 | */ |
---|
169 | public class RotationForest |
---|
170 | extends RandomizableParallelIteratedSingleClassifierEnhancer |
---|
171 | implements WeightedInstancesHandler, TechnicalInformationHandler { |
---|
172 | // It implements WeightedInstancesHandler because the base classifier |
---|
173 | // can implement this interface, but in this method the weights are |
---|
174 | // not used |
---|
175 | |
---|
176 | /** for serialization */ |
---|
177 | static final long serialVersionUID = -3255631880798499936L; |
---|
178 | |
---|
179 | /** The minimum size of a group */ |
---|
180 | protected int m_MinGroup = 3; |
---|
181 | |
---|
182 | /** The maximum size of a group */ |
---|
183 | protected int m_MaxGroup = 3; |
---|
184 | |
---|
185 | /** |
---|
186 | * Whether minGroup and maxGroup refer to the number of groups or their |
---|
187 | * size */ |
---|
188 | protected boolean m_NumberOfGroups = false; |
---|
189 | |
---|
190 | /** The percentage of instances to be removed */ |
---|
191 | protected int m_RemovedPercentage = 50; |
---|
192 | |
---|
193 | /** The attributes of each group */ |
---|
194 | protected int [][][] m_Groups = null; |
---|
195 | |
---|
196 | /** The type of projection filter */ |
---|
197 | protected Filter m_ProjectionFilter = null; |
---|
198 | |
---|
199 | /** The projection filters */ |
---|
200 | protected Filter [][] m_ProjectionFilters = null; |
---|
201 | |
---|
202 | /** Headers of the transformed dataset */ |
---|
203 | protected Instances [] m_Headers = null; |
---|
204 | |
---|
205 | /** Headers of the reduced datasets */ |
---|
206 | protected Instances [][] m_ReducedHeaders = null; |
---|
207 | |
---|
208 | /** Filter that remove useless attributes */ |
---|
209 | protected RemoveUseless m_RemoveUseless = null; |
---|
210 | |
---|
211 | /** Filter that normalized the attributes */ |
---|
212 | protected Normalize m_Normalize = null; |
---|
213 | |
---|
214 | /** Training data */ |
---|
215 | protected Instances m_data; |
---|
216 | |
---|
217 | protected Instances [] m_instancesOfClasses; |
---|
218 | |
---|
219 | protected Random m_random; |
---|
220 | |
---|
221 | /** |
---|
222 | * Constructor. |
---|
223 | */ |
---|
224 | public RotationForest() { |
---|
225 | |
---|
226 | m_Classifier = new weka.classifiers.trees.J48(); |
---|
227 | m_ProjectionFilter = defaultFilter(); |
---|
228 | } |
---|
229 | |
---|
230 | /** |
---|
231 | * Default projection method. |
---|
232 | */ |
---|
233 | protected Filter defaultFilter() { |
---|
234 | PrincipalComponents filter = new PrincipalComponents(); |
---|
235 | filter.setNormalize(false); |
---|
236 | filter.setVarianceCovered(1.0); |
---|
237 | return filter; |
---|
238 | } |
---|
239 | |
---|
240 | /** |
---|
241 | * Returns a string describing classifier |
---|
242 | * @return a description suitable for |
---|
243 | * displaying in the explorer/experimenter gui |
---|
244 | */ |
---|
245 | public String globalInfo() { |
---|
246 | |
---|
247 | return "Class for construction a Rotation Forest. Can do classification " |
---|
248 | + "and regression depending on the base learner. \n\n" |
---|
249 | + "For more information, see\n\n" |
---|
250 | + getTechnicalInformation().toString(); |
---|
251 | } |
---|
252 | |
---|
253 | /** |
---|
254 | * Returns an instance of a TechnicalInformation object, containing |
---|
255 | * detailed information about the technical background of this class, |
---|
256 | * e.g., paper reference or book this class is based on. |
---|
257 | * |
---|
258 | * @return the technical information about this class |
---|
259 | */ |
---|
260 | public TechnicalInformation getTechnicalInformation() { |
---|
261 | TechnicalInformation result; |
---|
262 | |
---|
263 | result = new TechnicalInformation(Type.ARTICLE); |
---|
264 | result.setValue(Field.AUTHOR, "Juan J. Rodriguez and Ludmila I. Kuncheva and Carlos J. Alonso"); |
---|
265 | result.setValue(Field.YEAR, "2006"); |
---|
266 | result.setValue(Field.TITLE, "Rotation Forest: A new classifier ensemble method"); |
---|
267 | result.setValue(Field.JOURNAL, "IEEE Transactions on Pattern Analysis and Machine Intelligence"); |
---|
268 | result.setValue(Field.VOLUME, "28"); |
---|
269 | result.setValue(Field.NUMBER, "10"); |
---|
270 | result.setValue(Field.PAGES, "1619-1630"); |
---|
271 | result.setValue(Field.ISSN, "0162-8828"); |
---|
272 | result.setValue(Field.URL, "http://doi.ieeecomputersociety.org/10.1109/TPAMI.2006.211"); |
---|
273 | |
---|
274 | return result; |
---|
275 | } |
---|
276 | |
---|
277 | /** |
---|
278 | * String describing default classifier. |
---|
279 | * |
---|
280 | * @return the default classifier classname |
---|
281 | */ |
---|
282 | protected String defaultClassifierString() { |
---|
283 | |
---|
284 | return "weka.classifiers.trees.J48"; |
---|
285 | } |
---|
286 | |
---|
287 | /** |
---|
288 | * Returns an enumeration describing the available options. |
---|
289 | * |
---|
290 | * @return an enumeration of all the available options. |
---|
291 | */ |
---|
292 | public Enumeration listOptions() { |
---|
293 | |
---|
294 | Vector newVector = new Vector(5); |
---|
295 | |
---|
296 | newVector.addElement(new Option( |
---|
297 | "\tWhether minGroup (-G) and maxGroup (-H) refer to" |
---|
298 | + "\n\tthe number of groups or their size." |
---|
299 | + "\n\t(default: false)", |
---|
300 | "N", 0, "-N")); |
---|
301 | |
---|
302 | newVector.addElement(new Option( |
---|
303 | "\tMinimum size of a group of attributes:" |
---|
304 | + "\n\t\tif numberOfGroups is true, the minimum number" |
---|
305 | + "\n\t\tof groups." |
---|
306 | + "\n\t\t(default: 3)", |
---|
307 | "G", 1, "-G <num>")); |
---|
308 | |
---|
309 | newVector.addElement(new Option( |
---|
310 | "\tMaximum size of a group of attributes:" |
---|
311 | + "\n\t\tif numberOfGroups is true, the maximum number" |
---|
312 | + "\n\t\tof groups." |
---|
313 | + "\n\t\t(default: 3)", |
---|
314 | "H", 1, "-H <num>")); |
---|
315 | |
---|
316 | newVector.addElement(new Option( |
---|
317 | "\tPercentage of instances to be removed." |
---|
318 | + "\n\t\t(default: 50)", |
---|
319 | "P", 1, "-P <num>")); |
---|
320 | |
---|
321 | newVector.addElement(new Option( |
---|
322 | "\tFull class name of filter to use, followed\n" |
---|
323 | + "\tby filter options.\n" |
---|
324 | + "\teg: \"weka.filters.unsupervised.attribute.PrincipalComponents-R 1.0\"", |
---|
325 | "F", 1, "-F <filter specification>")); |
---|
326 | |
---|
327 | Enumeration enu = super.listOptions(); |
---|
328 | while (enu.hasMoreElements()) { |
---|
329 | newVector.addElement(enu.nextElement()); |
---|
330 | } |
---|
331 | return newVector.elements(); |
---|
332 | } |
---|
333 | |
---|
334 | /** |
---|
335 | * Parses a given list of options. <p/> |
---|
336 | * |
---|
337 | <!-- options-start --> |
---|
338 | * Valid options are: <p/> |
---|
339 | * |
---|
340 | * <pre> -N |
---|
341 | * Whether minGroup (-G) and maxGroup (-H) refer to |
---|
342 | * the number of groups or their size. |
---|
343 | * (default: false)</pre> |
---|
344 | * |
---|
345 | * <pre> -G <num> |
---|
346 | * Minimum size of a group of attributes: |
---|
347 | * if numberOfGroups is true, the minimum number |
---|
348 | * of groups. |
---|
349 | * (default: 3)</pre> |
---|
350 | * |
---|
351 | * <pre> -H <num> |
---|
352 | * Maximum size of a group of attributes: |
---|
353 | * if numberOfGroups is true, the maximum number |
---|
354 | * of groups. |
---|
355 | * (default: 3)</pre> |
---|
356 | * |
---|
357 | * <pre> -P <num> |
---|
358 | * Percentage of instances to be removed. |
---|
359 | * (default: 50)</pre> |
---|
360 | * |
---|
361 | * <pre> -F <filter specification> |
---|
362 | * Full class name of filter to use, followed |
---|
363 | * by filter options. |
---|
364 | * eg: "weka.filters.unsupervised.attribute.PrincipalComponents-R 1.0"</pre> |
---|
365 | * |
---|
366 | * <pre> -S <num> |
---|
367 | * Random number seed. |
---|
368 | * (default 1)</pre> |
---|
369 | * |
---|
370 | * <pre> -I <num> |
---|
371 | * Number of iterations. |
---|
372 | * (default 10)</pre> |
---|
373 | * |
---|
374 | * <pre> -D |
---|
375 | * If set, classifier is run in debug mode and |
---|
376 | * may output additional info to the console</pre> |
---|
377 | * |
---|
378 | * <pre> -W |
---|
379 | * Full name of base classifier. |
---|
380 | * (default: weka.classifiers.trees.J48)</pre> |
---|
381 | * |
---|
382 | * <pre> |
---|
383 | * Options specific to classifier weka.classifiers.trees.J48: |
---|
384 | * </pre> |
---|
385 | * |
---|
386 | * <pre> -U |
---|
387 | * Use unpruned tree.</pre> |
---|
388 | * |
---|
389 | * <pre> -C <pruning confidence> |
---|
390 | * Set confidence threshold for pruning. |
---|
391 | * (default 0.25)</pre> |
---|
392 | * |
---|
393 | * <pre> -M <minimum number of instances> |
---|
394 | * Set minimum number of instances per leaf. |
---|
395 | * (default 2)</pre> |
---|
396 | * |
---|
397 | * <pre> -R |
---|
398 | * Use reduced error pruning.</pre> |
---|
399 | * |
---|
400 | * <pre> -N <number of folds> |
---|
401 | * Set number of folds for reduced error |
---|
402 | * pruning. One fold is used as pruning set. |
---|
403 | * (default 3)</pre> |
---|
404 | * |
---|
405 | * <pre> -B |
---|
406 | * Use binary splits only.</pre> |
---|
407 | * |
---|
408 | * <pre> -S |
---|
409 | * Don't perform subtree raising.</pre> |
---|
410 | * |
---|
411 | * <pre> -L |
---|
412 | * Do not clean up after the tree has been built.</pre> |
---|
413 | * |
---|
414 | * <pre> -A |
---|
415 | * Laplace smoothing for predicted probabilities.</pre> |
---|
416 | * |
---|
417 | * <pre> -Q <seed> |
---|
418 | * Seed for random data shuffling (default 1).</pre> |
---|
419 | * |
---|
420 | <!-- options-end --> |
---|
421 | * |
---|
422 | * @param options the list of options as an array of strings |
---|
423 | * @throws Exception if an option is not supported |
---|
424 | */ |
---|
425 | public void setOptions(String[] options) throws Exception { |
---|
426 | |
---|
427 | /* Taken from FilteredClassifier */ |
---|
428 | String filterString = Utils.getOption('F', options); |
---|
429 | if (filterString.length() > 0) { |
---|
430 | String [] filterSpec = Utils.splitOptions(filterString); |
---|
431 | if (filterSpec.length == 0) { |
---|
432 | throw new IllegalArgumentException("Invalid filter specification string"); |
---|
433 | } |
---|
434 | String filterName = filterSpec[0]; |
---|
435 | filterSpec[0] = ""; |
---|
436 | setProjectionFilter((Filter) Utils.forName(Filter.class, filterName, filterSpec)); |
---|
437 | } else { |
---|
438 | setProjectionFilter(defaultFilter()); |
---|
439 | } |
---|
440 | |
---|
441 | String tmpStr; |
---|
442 | |
---|
443 | tmpStr = Utils.getOption('G', options); |
---|
444 | if (tmpStr.length() != 0) |
---|
445 | setMinGroup(Integer.parseInt(tmpStr)); |
---|
446 | else |
---|
447 | setMinGroup(3); |
---|
448 | |
---|
449 | tmpStr = Utils.getOption('H', options); |
---|
450 | if (tmpStr.length() != 0) |
---|
451 | setMaxGroup(Integer.parseInt(tmpStr)); |
---|
452 | else |
---|
453 | setMaxGroup(3); |
---|
454 | |
---|
455 | tmpStr = Utils.getOption('P', options); |
---|
456 | if (tmpStr.length() != 0) |
---|
457 | setRemovedPercentage(Integer.parseInt(tmpStr)); |
---|
458 | else |
---|
459 | setRemovedPercentage(50); |
---|
460 | |
---|
461 | setNumberOfGroups(Utils.getFlag('N', options)); |
---|
462 | |
---|
463 | super.setOptions(options); |
---|
464 | } |
---|
465 | |
---|
466 | /** |
---|
467 | * Gets the current settings of the Classifier. |
---|
468 | * |
---|
469 | * @return an array of strings suitable for passing to setOptions |
---|
470 | */ |
---|
471 | public String [] getOptions() { |
---|
472 | |
---|
473 | String [] superOptions = super.getOptions(); |
---|
474 | String [] options = new String [superOptions.length + 9]; |
---|
475 | |
---|
476 | int current = 0; |
---|
477 | |
---|
478 | if (getNumberOfGroups()) { |
---|
479 | options[current++] = "-N"; |
---|
480 | } |
---|
481 | |
---|
482 | options[current++] = "-G"; |
---|
483 | options[current++] = "" + getMinGroup(); |
---|
484 | |
---|
485 | options[current++] = "-H"; |
---|
486 | options[current++] = "" + getMaxGroup(); |
---|
487 | |
---|
488 | options[current++] = "-P"; |
---|
489 | options[current++] = "" + getRemovedPercentage(); |
---|
490 | |
---|
491 | options[current++] = "-F"; |
---|
492 | options[current++] = getProjectionFilterSpec(); |
---|
493 | |
---|
494 | System.arraycopy(superOptions, 0, options, current, |
---|
495 | superOptions.length); |
---|
496 | |
---|
497 | current += superOptions.length; |
---|
498 | while (current < options.length) { |
---|
499 | options[current++] = ""; |
---|
500 | } |
---|
501 | return options; |
---|
502 | } |
---|
503 | |
---|
504 | /** |
---|
505 | * Returns the tip text for this property |
---|
506 | * @return tip text for this property suitable for |
---|
507 | * displaying in the explorer/experimenter gui |
---|
508 | */ |
---|
509 | public String numberOfGroupsTipText() { |
---|
510 | return "Whether minGroup and maxGroup refer to the number of groups or their size."; |
---|
511 | } |
---|
512 | |
---|
513 | /** |
---|
514 | * Set whether minGroup and maxGroup refer to the number of groups or their |
---|
515 | * size |
---|
516 | * |
---|
517 | * @param numberOfGroups whether minGroup and maxGroup refer to the number |
---|
518 | * of groups or their size |
---|
519 | */ |
---|
520 | public void setNumberOfGroups(boolean numberOfGroups) { |
---|
521 | |
---|
522 | m_NumberOfGroups = numberOfGroups; |
---|
523 | } |
---|
524 | |
---|
525 | /** |
---|
526 | * Get whether minGroup and maxGroup refer to the number of groups or their |
---|
527 | * size |
---|
528 | * |
---|
529 | * @return whether minGroup and maxGroup refer to the number of groups or |
---|
530 | * their size |
---|
531 | */ |
---|
532 | public boolean getNumberOfGroups() { |
---|
533 | |
---|
534 | return m_NumberOfGroups; |
---|
535 | } |
---|
536 | |
---|
537 | /** |
---|
538 | * Returns the tip text for this property |
---|
539 | * @return tip text for this property suitable for displaying in the |
---|
540 | * explorer/experimenter gui |
---|
541 | */ |
---|
542 | public String minGroupTipText() { |
---|
543 | return "Minimum size of a group (if numberOfGrups is true, the minimum number of groups."; |
---|
544 | } |
---|
545 | |
---|
546 | /** |
---|
547 | * Sets the minimum size of a group. |
---|
548 | * |
---|
549 | * @param minGroup the minimum value. |
---|
550 | * of attributes. |
---|
551 | */ |
---|
552 | public void setMinGroup( int minGroup ) throws IllegalArgumentException { |
---|
553 | |
---|
554 | if( minGroup <= 0 ) |
---|
555 | throw new IllegalArgumentException( "MinGroup has to be positive." ); |
---|
556 | m_MinGroup = minGroup; |
---|
557 | } |
---|
558 | |
---|
559 | /** |
---|
560 | * Gets the minimum size of a group. |
---|
561 | * |
---|
562 | * @return the minimum value. |
---|
563 | */ |
---|
564 | public int getMinGroup() { |
---|
565 | return m_MinGroup; |
---|
566 | } |
---|
567 | |
---|
568 | /** |
---|
569 | * Returns the tip text for this property |
---|
570 | * @return tip text for this property suitable for |
---|
571 | * displaying in the explorer/experimenter gui |
---|
572 | */ |
---|
573 | public String maxGroupTipText() { |
---|
574 | return "Maximum size of a group (if numberOfGrups is true, the maximum number of groups."; |
---|
575 | } |
---|
576 | |
---|
577 | /** |
---|
578 | * Sets the maximum size of a group. |
---|
579 | * |
---|
580 | * @param maxGroup the maximum value. |
---|
581 | * of attributes. |
---|
582 | */ |
---|
583 | public void setMaxGroup( int maxGroup ) throws IllegalArgumentException { |
---|
584 | |
---|
585 | if( maxGroup <= 0 ) |
---|
586 | throw new IllegalArgumentException( "MaxGroup has to be positive." ); |
---|
587 | m_MaxGroup = maxGroup; |
---|
588 | } |
---|
589 | |
---|
590 | /** |
---|
591 | * Gets the maximum size of a group. |
---|
592 | * |
---|
593 | * @return the maximum value. |
---|
594 | */ |
---|
595 | public int getMaxGroup() { |
---|
596 | return m_MaxGroup; |
---|
597 | } |
---|
598 | |
---|
599 | /** |
---|
600 | * Returns the tip text for this property |
---|
601 | * @return tip text for this property suitable for |
---|
602 | * displaying in the explorer/experimenter gui |
---|
603 | */ |
---|
604 | public String removedPercentageTipText() { |
---|
605 | return "The percentage of instances to be removed."; |
---|
606 | } |
---|
607 | |
---|
608 | /** |
---|
609 | * Sets the percentage of instance to be removed |
---|
610 | * |
---|
611 | * @param removedPercentage the percentage. |
---|
612 | */ |
---|
613 | public void setRemovedPercentage( int removedPercentage ) throws IllegalArgumentException { |
---|
614 | |
---|
615 | if( removedPercentage < 0 ) |
---|
616 | throw new IllegalArgumentException( "RemovedPercentage has to be >=0." ); |
---|
617 | if( removedPercentage >= 100 ) |
---|
618 | throw new IllegalArgumentException( "RemovedPercentage has to be <100." ); |
---|
619 | |
---|
620 | m_RemovedPercentage = removedPercentage; |
---|
621 | } |
---|
622 | |
---|
623 | /** |
---|
624 | * Gets the percentage of instances to be removed |
---|
625 | * |
---|
626 | * @return the percentage. |
---|
627 | */ |
---|
628 | public int getRemovedPercentage() { |
---|
629 | return m_RemovedPercentage; |
---|
630 | } |
---|
631 | |
---|
632 | /** |
---|
633 | * Returns the tip text for this property |
---|
634 | * @return tip text for this property suitable for |
---|
635 | * displaying in the explorer/experimenter gui |
---|
636 | */ |
---|
637 | public String projectionFilterTipText() { |
---|
638 | return "The filter used to project the data (e.g., PrincipalComponents)."; |
---|
639 | } |
---|
640 | |
---|
641 | /** |
---|
642 | * Sets the filter used to project the data. |
---|
643 | * |
---|
644 | * @param projectionFilter the filter. |
---|
645 | */ |
---|
646 | public void setProjectionFilter( Filter projectionFilter ) { |
---|
647 | |
---|
648 | m_ProjectionFilter = projectionFilter; |
---|
649 | } |
---|
650 | |
---|
651 | /** |
---|
652 | * Gets the filter used to project the data. |
---|
653 | * |
---|
654 | * @return the filter. |
---|
655 | */ |
---|
656 | public Filter getProjectionFilter() { |
---|
657 | return m_ProjectionFilter; |
---|
658 | } |
---|
659 | |
---|
660 | /** |
---|
661 | * Gets the filter specification string, which contains the class name of |
---|
662 | * the filter and any options to the filter |
---|
663 | * |
---|
664 | * @return the filter string. |
---|
665 | */ |
---|
666 | /* Taken from FilteredClassifier */ |
---|
667 | protected String getProjectionFilterSpec() { |
---|
668 | |
---|
669 | Filter c = getProjectionFilter(); |
---|
670 | if (c instanceof OptionHandler) { |
---|
671 | return c.getClass().getName() + " " |
---|
672 | + Utils.joinOptions(((OptionHandler)c).getOptions()); |
---|
673 | } |
---|
674 | return c.getClass().getName(); |
---|
675 | } |
---|
676 | |
---|
677 | /** |
---|
678 | * Returns description of the Rotation Forest classifier. |
---|
679 | * |
---|
680 | * @return description of the Rotation Forest classifier as a string |
---|
681 | */ |
---|
682 | public String toString() { |
---|
683 | |
---|
684 | if (m_Classifiers == null) { |
---|
685 | return "RotationForest: No model built yet."; |
---|
686 | } |
---|
687 | StringBuffer text = new StringBuffer(); |
---|
688 | text.append("All the base classifiers: \n\n"); |
---|
689 | for (int i = 0; i < m_Classifiers.length; i++) |
---|
690 | text.append(m_Classifiers[i].toString() + "\n\n"); |
---|
691 | |
---|
692 | return text.toString(); |
---|
693 | } |
---|
694 | |
---|
695 | /** |
---|
696 | * Returns the revision string. |
---|
697 | * |
---|
698 | * @return the revision |
---|
699 | */ |
---|
700 | public String getRevision() { |
---|
701 | return RevisionUtils.extract("$Revision: 5987 $"); |
---|
702 | } |
---|
703 | |
---|
704 | protected class ClassifierWrapper extends weka.classifiers.AbstractClassifier { |
---|
705 | |
---|
706 | /** For serialization */ |
---|
707 | private static final long serialVersionUID = 2327175798869994435L; |
---|
708 | |
---|
709 | protected weka.classifiers.Classifier m_wrappedClassifier; |
---|
710 | protected int m_classifierNumber; |
---|
711 | |
---|
712 | public ClassifierWrapper(weka.classifiers.Classifier classifier, int classifierNumber) { |
---|
713 | super(); |
---|
714 | |
---|
715 | m_wrappedClassifier = classifier; |
---|
716 | m_classifierNumber = classifierNumber; |
---|
717 | } |
---|
718 | |
---|
719 | public void buildClassifier(Instances data) throws Exception { |
---|
720 | m_ReducedHeaders[m_classifierNumber] = new Instances[ m_Groups[m_classifierNumber].length ]; |
---|
721 | FastVector transformedAttributes = new FastVector( m_data.numAttributes() ); |
---|
722 | |
---|
723 | // Construction of the dataset for each group of attributes |
---|
724 | for( int j = 0; j < m_Groups[ m_classifierNumber ].length; j++ ) { |
---|
725 | FastVector fv = new FastVector( m_Groups[m_classifierNumber][j].length + 1 ); |
---|
726 | for( int k = 0; k < m_Groups[m_classifierNumber][j].length; k++ ) { |
---|
727 | fv.addElement( m_data.attribute( m_Groups[m_classifierNumber][j][k] ).copy() ); |
---|
728 | } |
---|
729 | fv.addElement( m_data.classAttribute( ).copy() ); |
---|
730 | Instances dataSubSet = new Instances( "rotated-" + m_classifierNumber + "-" + j + "-", |
---|
731 | fv, 0); |
---|
732 | dataSubSet.setClassIndex( dataSubSet.numAttributes() - 1 ); |
---|
733 | |
---|
734 | // Select instances for the dataset |
---|
735 | m_ReducedHeaders[m_classifierNumber][j] = new Instances( dataSubSet, 0 ); |
---|
736 | boolean [] selectedClasses = selectClasses( m_instancesOfClasses.length, |
---|
737 | m_random ); |
---|
738 | for( int c = 0; c < selectedClasses.length; c++ ) { |
---|
739 | if( !selectedClasses[c] ) |
---|
740 | continue; |
---|
741 | Enumeration enu = m_instancesOfClasses[c].enumerateInstances(); |
---|
742 | while( enu.hasMoreElements() ) { |
---|
743 | Instance instance = (Instance)enu.nextElement(); |
---|
744 | Instance newInstance = new DenseInstance(dataSubSet.numAttributes()); |
---|
745 | newInstance.setDataset( dataSubSet ); |
---|
746 | for( int k = 0; k < m_Groups[m_classifierNumber][j].length; k++ ) { |
---|
747 | newInstance.setValue( k, instance.value( m_Groups[m_classifierNumber][j][k] ) ); |
---|
748 | } |
---|
749 | newInstance.setClassValue( instance.classValue( ) ); |
---|
750 | dataSubSet.add( newInstance ); |
---|
751 | } |
---|
752 | } |
---|
753 | |
---|
754 | dataSubSet.randomize(m_random); |
---|
755 | // Remove a percentage of the instances |
---|
756 | Instances originalDataSubSet = dataSubSet; |
---|
757 | dataSubSet.randomize(m_random); |
---|
758 | RemovePercentage rp = new RemovePercentage(); |
---|
759 | rp.setPercentage( m_RemovedPercentage ); |
---|
760 | rp.setInputFormat( dataSubSet ); |
---|
761 | dataSubSet = Filter.useFilter( dataSubSet, rp ); |
---|
762 | if( dataSubSet.numInstances() < 2 ) { |
---|
763 | dataSubSet = originalDataSubSet; |
---|
764 | } |
---|
765 | |
---|
766 | // Project de data |
---|
767 | m_ProjectionFilters[m_classifierNumber][j].setInputFormat( dataSubSet ); |
---|
768 | Instances projectedData = null; |
---|
769 | do { |
---|
770 | try { |
---|
771 | projectedData = Filter.useFilter( dataSubSet, |
---|
772 | m_ProjectionFilters[m_classifierNumber][j] ); |
---|
773 | } catch ( Exception e ) { |
---|
774 | // The data could not be projected, we add some random instances |
---|
775 | addRandomInstances( dataSubSet, 10, m_random ); |
---|
776 | } |
---|
777 | } while( projectedData == null ); |
---|
778 | |
---|
779 | // Include the projected attributes in the attributes of the |
---|
780 | // transformed dataset |
---|
781 | for( int a = 0; a < projectedData.numAttributes() - 1; a++ ) { |
---|
782 | transformedAttributes.addElement( projectedData.attribute(a).copy()); |
---|
783 | } |
---|
784 | } |
---|
785 | |
---|
786 | transformedAttributes.addElement( m_data.classAttribute().copy() ); |
---|
787 | Instances transformedData = new Instances( "rotated-" + m_classifierNumber + "-", |
---|
788 | transformedAttributes, 0 ); |
---|
789 | transformedData.setClassIndex( transformedData.numAttributes() - 1 ); |
---|
790 | m_Headers[ m_classifierNumber ] = new Instances( transformedData, 0 ); |
---|
791 | |
---|
792 | // Project all the training data |
---|
793 | Enumeration enu = m_data.enumerateInstances(); |
---|
794 | while( enu.hasMoreElements() ) { |
---|
795 | Instance instance = (Instance)enu.nextElement(); |
---|
796 | Instance newInstance = convertInstance( instance, m_classifierNumber ); |
---|
797 | transformedData.add( newInstance ); |
---|
798 | } |
---|
799 | |
---|
800 | // Build the base classifier |
---|
801 | if (m_wrappedClassifier instanceof Randomizable) { |
---|
802 | ((Randomizable) m_wrappedClassifier).setSeed(m_random.nextInt()); |
---|
803 | } |
---|
804 | m_wrappedClassifier.buildClassifier( transformedData ); |
---|
805 | } |
---|
806 | |
---|
807 | public double classifierInstance(Instance instance) throws Exception { |
---|
808 | return m_wrappedClassifier.classifyInstance(instance); |
---|
809 | } |
---|
810 | |
---|
811 | public double[] distributionForInstance(Instance instance) throws Exception { |
---|
812 | return m_wrappedClassifier.distributionForInstance(instance); |
---|
813 | } |
---|
814 | |
---|
815 | public String toString() { |
---|
816 | return m_wrappedClassifier.toString(); |
---|
817 | } |
---|
818 | } |
---|
819 | |
---|
820 | protected Instances getTrainingSet(int iteration) throws Exception { |
---|
821 | |
---|
822 | // The wrapped base classifiers' buildClassifier method creates the |
---|
823 | // transformed training data |
---|
824 | return m_data; |
---|
825 | } |
---|
826 | |
---|
827 | /** |
---|
828 | * builds the classifier. |
---|
829 | * |
---|
830 | * @param data the training data to be used for generating the |
---|
831 | * classifier. |
---|
832 | * @throws Exception if the classifier could not be built successfully |
---|
833 | */ |
---|
834 | public void buildClassifier(Instances data) throws Exception { |
---|
835 | |
---|
836 | // can classifier handle the data? |
---|
837 | getCapabilities().testWithFail(data); |
---|
838 | |
---|
839 | m_data = new Instances( data ); |
---|
840 | super.buildClassifier(m_data); |
---|
841 | |
---|
842 | // Wrap up the base classifiers |
---|
843 | for (int i = 0; i < m_Classifiers.length; i++) { |
---|
844 | ClassifierWrapper cw = new ClassifierWrapper(m_Classifiers[i], i); |
---|
845 | |
---|
846 | m_Classifiers[i] = cw; |
---|
847 | } |
---|
848 | |
---|
849 | checkMinMax(m_data); |
---|
850 | |
---|
851 | if( m_data.numInstances() > 0 ) { |
---|
852 | // This function fails if there are 0 instances |
---|
853 | m_random = m_data.getRandomNumberGenerator(m_Seed); |
---|
854 | } |
---|
855 | else { |
---|
856 | m_random = new Random(m_Seed); |
---|
857 | } |
---|
858 | |
---|
859 | m_RemoveUseless = new RemoveUseless(); |
---|
860 | m_RemoveUseless.setInputFormat(m_data); |
---|
861 | m_data = Filter.useFilter(data, m_RemoveUseless); |
---|
862 | |
---|
863 | m_Normalize = new Normalize(); |
---|
864 | m_Normalize.setInputFormat(m_data); |
---|
865 | m_data = Filter.useFilter(m_data, m_Normalize); |
---|
866 | |
---|
867 | if(m_NumberOfGroups) { |
---|
868 | generateGroupsFromNumbers(m_data, m_random); |
---|
869 | } |
---|
870 | else { |
---|
871 | generateGroupsFromSizes(m_data, m_random); |
---|
872 | } |
---|
873 | |
---|
874 | m_ProjectionFilters = new Filter[m_Groups.length][]; |
---|
875 | for(int i = 0; i < m_ProjectionFilters.length; i++ ) { |
---|
876 | m_ProjectionFilters[i] = Filter.makeCopies( m_ProjectionFilter, |
---|
877 | m_Groups[i].length ); |
---|
878 | } |
---|
879 | |
---|
880 | int numClasses = m_data.numClasses(); |
---|
881 | |
---|
882 | m_instancesOfClasses = new Instances[numClasses + 1]; |
---|
883 | if( m_data.classAttribute().isNumeric() ) { |
---|
884 | m_instancesOfClasses = new Instances[numClasses]; |
---|
885 | m_instancesOfClasses[0] = m_data; |
---|
886 | } |
---|
887 | else { |
---|
888 | m_instancesOfClasses = new Instances[numClasses+1]; |
---|
889 | for( int i = 0; i < m_instancesOfClasses.length; i++ ) { |
---|
890 | m_instancesOfClasses[ i ] = new Instances( m_data, 0 ); |
---|
891 | } |
---|
892 | Enumeration enu = m_data.enumerateInstances(); |
---|
893 | while( enu.hasMoreElements() ) { |
---|
894 | Instance instance = (Instance)enu.nextElement(); |
---|
895 | if( instance.classIsMissing() ) { |
---|
896 | m_instancesOfClasses[numClasses].add( instance ); |
---|
897 | } |
---|
898 | else { |
---|
899 | int c = (int)instance.classValue(); |
---|
900 | m_instancesOfClasses[c].add( instance ); |
---|
901 | } |
---|
902 | } |
---|
903 | // If there are not instances with a missing class, we do not need to |
---|
904 | // consider them |
---|
905 | if( m_instancesOfClasses[numClasses].numInstances() == 0 ) { |
---|
906 | Instances [] tmp = m_instancesOfClasses; |
---|
907 | m_instancesOfClasses = new Instances[ numClasses ]; |
---|
908 | System.arraycopy( tmp, 0, m_instancesOfClasses, 0, numClasses ); |
---|
909 | } |
---|
910 | } |
---|
911 | |
---|
912 | // These arrays keep the information of the transformed data set |
---|
913 | m_Headers = new Instances[ m_Classifiers.length ]; |
---|
914 | m_ReducedHeaders = new Instances[ m_Classifiers.length ][]; |
---|
915 | |
---|
916 | buildClassifiers(); |
---|
917 | |
---|
918 | if(m_Debug){ |
---|
919 | printGroups(); |
---|
920 | } |
---|
921 | |
---|
922 | // save memory |
---|
923 | m_data = null; |
---|
924 | m_instancesOfClasses = null; |
---|
925 | m_random = null; |
---|
926 | } |
---|
927 | |
---|
928 | /** |
---|
929 | * Adds random instances to the dataset. |
---|
930 | * |
---|
931 | * @param dataset the dataset |
---|
932 | * @param numInstances the number of instances |
---|
933 | * @param random a random number generator |
---|
934 | */ |
---|
935 | protected void addRandomInstances( Instances dataset, int numInstances, |
---|
936 | Random random ) { |
---|
937 | int n = dataset.numAttributes(); |
---|
938 | double [] v = new double[ n ]; |
---|
939 | for( int i = 0; i < numInstances; i++ ) { |
---|
940 | for( int j = 0; j < n; j++ ) { |
---|
941 | Attribute att = dataset.attribute( j ); |
---|
942 | if( att.isNumeric() ) { |
---|
943 | v[ j ] = random.nextDouble(); |
---|
944 | } |
---|
945 | else if ( att.isNominal() ) { |
---|
946 | v[ j ] = random.nextInt( att.numValues() ); |
---|
947 | } |
---|
948 | } |
---|
949 | dataset.add( new DenseInstance( 1, v ) ); |
---|
950 | } |
---|
951 | } |
---|
952 | |
---|
953 | /** |
---|
954 | * Checks m_MinGroup and m_MaxGroup |
---|
955 | * |
---|
956 | * @param data the dataset |
---|
957 | */ |
---|
958 | protected void checkMinMax(Instances data) { |
---|
959 | if( m_MinGroup > m_MaxGroup ) { |
---|
960 | int tmp = m_MaxGroup; |
---|
961 | m_MaxGroup = m_MinGroup; |
---|
962 | m_MinGroup = tmp; |
---|
963 | } |
---|
964 | |
---|
965 | int n = data.numAttributes(); |
---|
966 | if( m_MaxGroup >= n ) |
---|
967 | m_MaxGroup = n - 1; |
---|
968 | if( m_MinGroup >= n ) |
---|
969 | m_MinGroup = n - 1; |
---|
970 | } |
---|
971 | |
---|
972 | /** |
---|
973 | * Selects a non-empty subset of the classes |
---|
974 | * |
---|
975 | * @param numClasses the number of classes |
---|
976 | * @param random the random number generator. |
---|
977 | * @return a random subset of classes |
---|
978 | */ |
---|
979 | protected boolean [] selectClasses( int numClasses, Random random ) { |
---|
980 | |
---|
981 | int numSelected = 0; |
---|
982 | boolean selected[] = new boolean[ numClasses ]; |
---|
983 | |
---|
984 | for( int i = 0; i < selected.length; i++ ) { |
---|
985 | if(random.nextBoolean()) { |
---|
986 | selected[i] = true; |
---|
987 | numSelected++; |
---|
988 | } |
---|
989 | } |
---|
990 | if( numSelected == 0 ) { |
---|
991 | selected[random.nextInt( selected.length )] = true; |
---|
992 | } |
---|
993 | return selected; |
---|
994 | } |
---|
995 | |
---|
996 | /** |
---|
997 | * generates the groups of attributes, given their minimum and maximum |
---|
998 | * sizes. |
---|
999 | * |
---|
1000 | * @param data the training data to be used for generating the |
---|
1001 | * groups. |
---|
1002 | * @param random the random number generator. |
---|
1003 | */ |
---|
1004 | protected void generateGroupsFromSizes(Instances data, Random random) { |
---|
1005 | m_Groups = new int[m_Classifiers.length][][]; |
---|
1006 | for( int i = 0; i < m_Classifiers.length; i++ ) { |
---|
1007 | int [] permutation = attributesPermutation(data.numAttributes(), |
---|
1008 | data.classIndex(), random); |
---|
1009 | |
---|
1010 | // The number of groups that have a given size |
---|
1011 | int [] numGroupsOfSize = new int[m_MaxGroup - m_MinGroup + 1]; |
---|
1012 | |
---|
1013 | int numAttributes = 0; |
---|
1014 | int numGroups; |
---|
1015 | |
---|
1016 | // Select the size of each group |
---|
1017 | for( numGroups = 0; numAttributes < permutation.length; numGroups++ ) { |
---|
1018 | int n = random.nextInt( numGroupsOfSize.length ); |
---|
1019 | numGroupsOfSize[n]++; |
---|
1020 | numAttributes += m_MinGroup + n; |
---|
1021 | } |
---|
1022 | |
---|
1023 | m_Groups[i] = new int[numGroups][]; |
---|
1024 | int currentAttribute = 0; |
---|
1025 | int currentSize = 0; |
---|
1026 | for( int j = 0; j < numGroups; j++ ) { |
---|
1027 | while( numGroupsOfSize[ currentSize ] == 0 ) |
---|
1028 | currentSize++; |
---|
1029 | numGroupsOfSize[ currentSize ]--; |
---|
1030 | int n = m_MinGroup + currentSize; |
---|
1031 | m_Groups[i][j] = new int[n]; |
---|
1032 | for( int k = 0; k < n; k++ ) { |
---|
1033 | if( currentAttribute < permutation.length ) |
---|
1034 | m_Groups[i][j][k] = permutation[ currentAttribute ]; |
---|
1035 | else |
---|
1036 | // For the last group, it can be necessary to reuse some attributes |
---|
1037 | m_Groups[i][j][k] = permutation[ random.nextInt( |
---|
1038 | permutation.length ) ]; |
---|
1039 | currentAttribute++; |
---|
1040 | } |
---|
1041 | } |
---|
1042 | } |
---|
1043 | } |
---|
1044 | |
---|
1045 | /** |
---|
1046 | * generates the groups of attributes, given their minimum and maximum |
---|
1047 | * numbers. |
---|
1048 | * |
---|
1049 | * @param data the training data to be used for generating the |
---|
1050 | * groups. |
---|
1051 | * @param random the random number generator. |
---|
1052 | */ |
---|
1053 | protected void generateGroupsFromNumbers(Instances data, Random random) { |
---|
1054 | m_Groups = new int[m_Classifiers.length][][]; |
---|
1055 | for( int i = 0; i < m_Classifiers.length; i++ ) { |
---|
1056 | int [] permutation = attributesPermutation(data.numAttributes(), |
---|
1057 | data.classIndex(), random); |
---|
1058 | int numGroups = m_MinGroup + random.nextInt(m_MaxGroup - m_MinGroup + 1); |
---|
1059 | m_Groups[i] = new int[numGroups][]; |
---|
1060 | int groupSize = permutation.length / numGroups; |
---|
1061 | |
---|
1062 | // Some groups will have an additional attribute |
---|
1063 | int numBiggerGroups = permutation.length % numGroups; |
---|
1064 | |
---|
1065 | // Distribute the attributes in the groups |
---|
1066 | int currentAttribute = 0; |
---|
1067 | for( int j = 0; j < numGroups; j++ ) { |
---|
1068 | if( j < numBiggerGroups ) { |
---|
1069 | m_Groups[i][j] = new int[groupSize + 1]; |
---|
1070 | } |
---|
1071 | else { |
---|
1072 | m_Groups[i][j] = new int[groupSize]; |
---|
1073 | } |
---|
1074 | for( int k = 0; k < m_Groups[i][j].length; k++ ) { |
---|
1075 | m_Groups[i][j][k] = permutation[currentAttribute++]; |
---|
1076 | } |
---|
1077 | } |
---|
1078 | } |
---|
1079 | } |
---|
1080 | |
---|
1081 | /** |
---|
1082 | * generates a permutation of the attributes. |
---|
1083 | * |
---|
1084 | * @param numAttributes the number of attributes. |
---|
1085 | * @param classAttributes the index of the class attribute. |
---|
1086 | * @param random the random number generator. |
---|
1087 | * @return a permutation of the attributes |
---|
1088 | */ |
---|
1089 | protected int [] attributesPermutation(int numAttributes, int classAttribute, |
---|
1090 | Random random) { |
---|
1091 | int [] permutation = new int[numAttributes-1]; |
---|
1092 | int i = 0; |
---|
1093 | for(; i < classAttribute; i++){ |
---|
1094 | permutation[i] = i; |
---|
1095 | } |
---|
1096 | for(; i < permutation.length; i++){ |
---|
1097 | permutation[i] = i + 1; |
---|
1098 | } |
---|
1099 | |
---|
1100 | permute( permutation, random ); |
---|
1101 | |
---|
1102 | return permutation; |
---|
1103 | } |
---|
1104 | |
---|
1105 | /** |
---|
1106 | * permutes the elements of a given array. |
---|
1107 | * |
---|
1108 | * @param v the array to permute |
---|
1109 | * @param random the random number generator. |
---|
1110 | */ |
---|
1111 | protected void permute( int v[], Random random ) { |
---|
1112 | |
---|
1113 | for(int i = v.length - 1; i > 0; i-- ) { |
---|
1114 | int j = random.nextInt( i + 1 ); |
---|
1115 | if( i != j ) { |
---|
1116 | int tmp = v[i]; |
---|
1117 | v[i] = v[j]; |
---|
1118 | v[j] = tmp; |
---|
1119 | } |
---|
1120 | } |
---|
1121 | } |
---|
1122 | |
---|
1123 | /** |
---|
1124 | * prints the groups. |
---|
1125 | */ |
---|
1126 | protected void printGroups( ) { |
---|
1127 | for( int i = 0; i < m_Groups.length; i++ ) { |
---|
1128 | for( int j = 0; j < m_Groups[i].length; j++ ) { |
---|
1129 | System.err.print( "( " ); |
---|
1130 | for( int k = 0; k < m_Groups[i][j].length; k++ ) { |
---|
1131 | System.err.print( m_Groups[i][j][k] ); |
---|
1132 | System.err.print( " " ); |
---|
1133 | } |
---|
1134 | System.err.print( ") " ); |
---|
1135 | } |
---|
1136 | System.err.println( ); |
---|
1137 | } |
---|
1138 | } |
---|
1139 | |
---|
1140 | /** |
---|
1141 | * Transforms an instance for the i-th classifier. |
---|
1142 | * |
---|
1143 | * @param instance the instance to be transformed |
---|
1144 | * @param i the base classifier number |
---|
1145 | * @return the transformed instance |
---|
1146 | * @throws Exception if the instance can't be converted successfully |
---|
1147 | */ |
---|
1148 | protected Instance convertInstance( Instance instance, int i ) |
---|
1149 | throws Exception { |
---|
1150 | Instance newInstance = new DenseInstance( m_Headers[ i ].numAttributes( ) ); |
---|
1151 | newInstance.setDataset( m_Headers[ i ] ); |
---|
1152 | int currentAttribute = 0; |
---|
1153 | |
---|
1154 | // Project the data for each group |
---|
1155 | for( int j = 0; j < m_Groups[i].length; j++ ) { |
---|
1156 | Instance auxInstance = new DenseInstance( m_Groups[i][j].length + 1 ); |
---|
1157 | int k; |
---|
1158 | for( k = 0; k < m_Groups[i][j].length; k++ ) { |
---|
1159 | auxInstance.setValue( k, instance.value( m_Groups[i][j][k] ) ); |
---|
1160 | } |
---|
1161 | auxInstance.setValue( k, instance.classValue( ) ); |
---|
1162 | auxInstance.setDataset( m_ReducedHeaders[ i ][ j ] ); |
---|
1163 | m_ProjectionFilters[i][j].input( auxInstance ); |
---|
1164 | auxInstance = m_ProjectionFilters[i][j].output( ); |
---|
1165 | m_ProjectionFilters[i][j].batchFinished(); |
---|
1166 | for( int a = 0; a < auxInstance.numAttributes() - 1; a++ ) { |
---|
1167 | newInstance.setValue( currentAttribute++, auxInstance.value( a ) ); |
---|
1168 | } |
---|
1169 | } |
---|
1170 | |
---|
1171 | newInstance.setClassValue( instance.classValue() ); |
---|
1172 | return newInstance; |
---|
1173 | } |
---|
1174 | |
---|
1175 | /** |
---|
1176 | * Calculates the class membership probabilities for the given test |
---|
1177 | * instance. |
---|
1178 | * |
---|
1179 | * @param instance the instance to be classified |
---|
1180 | * @return preedicted class probability distribution |
---|
1181 | * @throws Exception if distribution can't be computed successfully |
---|
1182 | */ |
---|
1183 | public double[] distributionForInstance(Instance instance) throws Exception { |
---|
1184 | |
---|
1185 | m_RemoveUseless.input(instance); |
---|
1186 | instance =m_RemoveUseless.output(); |
---|
1187 | m_RemoveUseless.batchFinished(); |
---|
1188 | |
---|
1189 | m_Normalize.input(instance); |
---|
1190 | instance =m_Normalize.output(); |
---|
1191 | m_Normalize.batchFinished(); |
---|
1192 | |
---|
1193 | double [] sums = new double [instance.numClasses()], newProbs; |
---|
1194 | |
---|
1195 | for (int i = 0; i < m_Classifiers.length; i++) { |
---|
1196 | Instance convertedInstance = convertInstance(instance, i); |
---|
1197 | if (instance.classAttribute().isNumeric() == true) { |
---|
1198 | sums[0] += m_Classifiers[i].classifyInstance(convertedInstance); |
---|
1199 | } else { |
---|
1200 | newProbs = m_Classifiers[i].distributionForInstance(convertedInstance); |
---|
1201 | for (int j = 0; j < newProbs.length; j++) |
---|
1202 | sums[j] += newProbs[j]; |
---|
1203 | } |
---|
1204 | } |
---|
1205 | if (instance.classAttribute().isNumeric() == true) { |
---|
1206 | sums[0] /= (double)m_NumIterations; |
---|
1207 | return sums; |
---|
1208 | } else if (Utils.eq(Utils.sum(sums), 0)) { |
---|
1209 | return sums; |
---|
1210 | } else { |
---|
1211 | Utils.normalize(sums); |
---|
1212 | return sums; |
---|
1213 | } |
---|
1214 | } |
---|
1215 | |
---|
1216 | /** |
---|
1217 | * Main method for testing this class. |
---|
1218 | * |
---|
1219 | * @param argv the options |
---|
1220 | */ |
---|
1221 | public static void main(String [] argv) { |
---|
1222 | runClassifier(new RotationForest(), argv); |
---|
1223 | } |
---|
1224 | |
---|
1225 | } |
---|
1226 | |
---|