1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * MultiBoostAB.java |
---|
19 | * |
---|
20 | * MultiBoosting is an extension to the highly successful AdaBoost |
---|
21 | * technique for forming decision committees. MultiBoosting can be |
---|
22 | * viewed as combining AdaBoost with wagging. It is able to harness |
---|
23 | * both AdaBoost's high bias and variance reduction with wagging's |
---|
24 | * superior variance reduction. Using C4.5 as the base learning |
---|
25 | * algorithm, Multi-boosting is demonstrated to produce decision |
---|
26 | * committees with lower error than either AdaBoost or wagging |
---|
27 | * significantly more often than the reverse over a large |
---|
28 | * representative cross-section of UCI data sets. It offers the |
---|
29 | * further advantage over AdaBoost of suiting parallel execution. |
---|
30 | * |
---|
31 | * For more info refer to : |
---|
32 | <!-- technical-plaintext-start --> |
---|
33 | * Geoffrey I. Webb (2000). MultiBoosting: A Technique for Combining Boosting and Wagging. Machine Learning. Vol.40(No.2). |
---|
34 | <!-- technical-plaintext-end --> |
---|
35 | * |
---|
36 | * Originally based on AdaBoostM1.java |
---|
37 | * |
---|
38 | * http://www.cm.deakin.edu.au/webb |
---|
39 | * |
---|
40 | * School of Computing and Mathematics |
---|
41 | * Deakin University |
---|
42 | * Geelong, Vic, 3217, Australia |
---|
43 | * Copyright (C) 2001 Deakin University |
---|
44 | * |
---|
45 | */ |
---|
46 | |
---|
47 | package weka.classifiers.meta; |
---|
48 | |
---|
49 | import weka.core.Instances; |
---|
50 | import weka.core.Option; |
---|
51 | import weka.core.RevisionUtils; |
---|
52 | import weka.core.TechnicalInformation; |
---|
53 | import weka.core.TechnicalInformationHandler; |
---|
54 | import weka.core.Utils; |
---|
55 | import weka.core.TechnicalInformation.Field; |
---|
56 | import weka.core.TechnicalInformation.Type; |
---|
57 | |
---|
58 | import java.util.Enumeration; |
---|
59 | import java.util.Random; |
---|
60 | import java.util.Vector; |
---|
61 | |
---|
62 | /** |
---|
63 | <!-- globalinfo-start --> |
---|
64 | * Class for boosting a classifier using the MultiBoosting method.<br/> |
---|
65 | * <br/> |
---|
66 | * MultiBoosting is an extension to the highly successful AdaBoost technique for forming decision committees. MultiBoosting can be viewed as combining AdaBoost with wagging. It is able to harness both AdaBoost's high bias and variance reduction with wagging's superior variance reduction. Using C4.5 as the base learning algorithm, Multi-boosting is demonstrated to produce decision committees with lower error than either AdaBoost or wagging significantly more often than the reverse over a large representative cross-section of UCI data sets. It offers the further advantage over AdaBoost of suiting parallel execution.<br/> |
---|
67 | * <br/> |
---|
68 | * For more information, see<br/> |
---|
69 | * <br/> |
---|
70 | * Geoffrey I. Webb (2000). MultiBoosting: A Technique for Combining Boosting and Wagging. Machine Learning. Vol.40(No.2). |
---|
71 | * <p/> |
---|
72 | <!-- globalinfo-end --> |
---|
73 | * |
---|
74 | <!-- technical-bibtex-start --> |
---|
75 | * BibTeX: |
---|
76 | * <pre> |
---|
77 | * @article{Webb2000, |
---|
78 | * address = {Boston}, |
---|
79 | * author = {Geoffrey I. Webb}, |
---|
80 | * journal = {Machine Learning}, |
---|
81 | * number = {No.2}, |
---|
82 | * publisher = {Kluwer Academic Publishers}, |
---|
83 | * title = {MultiBoosting: A Technique for Combining Boosting and Wagging}, |
---|
84 | * volume = {Vol.40}, |
---|
85 | * year = {2000} |
---|
86 | * } |
---|
87 | * </pre> |
---|
88 | * <p/> |
---|
89 | <!-- technical-bibtex-end --> |
---|
90 | * |
---|
91 | <!-- options-start --> |
---|
92 | * Valid options are: <p/> |
---|
93 | * |
---|
94 | * <pre> -C <num> |
---|
95 | * Number of sub-committees. (Default 3)</pre> |
---|
96 | * |
---|
97 | * <pre> -P <num> |
---|
98 | * Percentage of weight mass to base training on. |
---|
99 | * (default 100, reduce to around 90 speed up)</pre> |
---|
100 | * |
---|
101 | * <pre> -Q |
---|
102 | * Use resampling for boosting.</pre> |
---|
103 | * |
---|
104 | * <pre> -S <num> |
---|
105 | * Random number seed. |
---|
106 | * (default 1)</pre> |
---|
107 | * |
---|
108 | * <pre> -I <num> |
---|
109 | * Number of iterations. |
---|
110 | * (default 10)</pre> |
---|
111 | * |
---|
112 | * <pre> -D |
---|
113 | * If set, classifier is run in debug mode and |
---|
114 | * may output additional info to the console</pre> |
---|
115 | * |
---|
116 | * <pre> -W |
---|
117 | * Full name of base classifier. |
---|
118 | * (default: weka.classifiers.trees.DecisionStump)</pre> |
---|
119 | * |
---|
120 | * <pre> |
---|
121 | * Options specific to classifier weka.classifiers.trees.DecisionStump: |
---|
122 | * </pre> |
---|
123 | * |
---|
124 | * <pre> -D |
---|
125 | * If set, classifier is run in debug mode and |
---|
126 | * may output additional info to the console</pre> |
---|
127 | * |
---|
128 | <!-- options-end --> |
---|
129 | * |
---|
130 | * Options after -- are passed to the designated classifier.<p> |
---|
131 | * |
---|
132 | * @author Shane Butler (sbutle@deakin.edu.au) |
---|
133 | * @author Eibe Frank (eibe@cs.waikato.ac.nz) |
---|
134 | * @author Len Trigg (trigg@cs.waikato.ac.nz) |
---|
135 | * @version $Revision: 1.16 $ |
---|
136 | */ |
---|
137 | public class MultiBoostAB |
---|
138 | extends AdaBoostM1 |
---|
139 | implements TechnicalInformationHandler { |
---|
140 | |
---|
141 | /** for serialization */ |
---|
142 | static final long serialVersionUID = -6681619178187935148L; |
---|
143 | |
---|
144 | /** The number of sub-committees to use */ |
---|
145 | protected int m_NumSubCmtys = 3; |
---|
146 | |
---|
147 | /** Random number generator */ |
---|
148 | protected Random m_Random = null; |
---|
149 | |
---|
150 | /** |
---|
151 | * Returns a string describing classifier |
---|
152 | * @return a description suitable for |
---|
153 | * displaying in the explorer/experimenter gui |
---|
154 | */ |
---|
155 | public String globalInfo() { |
---|
156 | |
---|
157 | return "Class for boosting a classifier using the MultiBoosting method.\n\n" |
---|
158 | + "MultiBoosting is an extension to the highly successful AdaBoost " |
---|
159 | + "technique for forming decision committees. MultiBoosting can be " |
---|
160 | + "viewed as combining AdaBoost with wagging. It is able to harness " |
---|
161 | + "both AdaBoost's high bias and variance reduction with wagging's " |
---|
162 | + "superior variance reduction. Using C4.5 as the base learning " |
---|
163 | + "algorithm, Multi-boosting is demonstrated to produce decision " |
---|
164 | + "committees with lower error than either AdaBoost or wagging " |
---|
165 | + "significantly more often than the reverse over a large " |
---|
166 | + "representative cross-section of UCI data sets. It offers the " |
---|
167 | + "further advantage over AdaBoost of suiting parallel execution.\n\n" |
---|
168 | + "For more information, see\n\n" |
---|
169 | + getTechnicalInformation().toString(); |
---|
170 | } |
---|
171 | |
---|
172 | /** |
---|
173 | * Returns an instance of a TechnicalInformation object, containing |
---|
174 | * detailed information about the technical background of this class, |
---|
175 | * e.g., paper reference or book this class is based on. |
---|
176 | * |
---|
177 | * @return the technical information about this class |
---|
178 | */ |
---|
179 | public TechnicalInformation getTechnicalInformation() { |
---|
180 | TechnicalInformation result; |
---|
181 | |
---|
182 | result = new TechnicalInformation(Type.ARTICLE); |
---|
183 | result.setValue(Field.AUTHOR, "Geoffrey I. Webb"); |
---|
184 | result.setValue(Field.YEAR, "2000"); |
---|
185 | result.setValue(Field.TITLE, "MultiBoosting: A Technique for Combining Boosting and Wagging"); |
---|
186 | result.setValue(Field.JOURNAL, "Machine Learning"); |
---|
187 | result.setValue(Field.VOLUME, "Vol.40"); |
---|
188 | result.setValue(Field.NUMBER, "No.2"); |
---|
189 | result.setValue(Field.PUBLISHER, "Kluwer Academic Publishers"); |
---|
190 | result.setValue(Field.ADDRESS, "Boston"); |
---|
191 | |
---|
192 | return result; |
---|
193 | } |
---|
194 | |
---|
195 | /** |
---|
196 | * Returns an enumeration describing the available options |
---|
197 | * |
---|
198 | * @return an enumeration of all the available options |
---|
199 | */ |
---|
200 | public Enumeration listOptions() { |
---|
201 | |
---|
202 | Enumeration enu = super.listOptions(); |
---|
203 | Vector vec = new Vector(1); |
---|
204 | |
---|
205 | vec.addElement(new Option( |
---|
206 | "\tNumber of sub-committees. (Default 3)", |
---|
207 | "C", 1, "-C <num>")); |
---|
208 | while (enu.hasMoreElements()) { |
---|
209 | vec.addElement(enu.nextElement()); |
---|
210 | } |
---|
211 | return vec.elements(); |
---|
212 | } |
---|
213 | |
---|
214 | /** |
---|
215 | * Parses a given list of options. <p/> |
---|
216 | * |
---|
217 | <!-- options-start --> |
---|
218 | * Valid options are: <p/> |
---|
219 | * |
---|
220 | * <pre> -C <num> |
---|
221 | * Number of sub-committees. (Default 3)</pre> |
---|
222 | * |
---|
223 | * <pre> -P <num> |
---|
224 | * Percentage of weight mass to base training on. |
---|
225 | * (default 100, reduce to around 90 speed up)</pre> |
---|
226 | * |
---|
227 | * <pre> -Q |
---|
228 | * Use resampling for boosting.</pre> |
---|
229 | * |
---|
230 | * <pre> -S <num> |
---|
231 | * Random number seed. |
---|
232 | * (default 1)</pre> |
---|
233 | * |
---|
234 | * <pre> -I <num> |
---|
235 | * Number of iterations. |
---|
236 | * (default 10)</pre> |
---|
237 | * |
---|
238 | * <pre> -D |
---|
239 | * If set, classifier is run in debug mode and |
---|
240 | * may output additional info to the console</pre> |
---|
241 | * |
---|
242 | * <pre> -W |
---|
243 | * Full name of base classifier. |
---|
244 | * (default: weka.classifiers.trees.DecisionStump)</pre> |
---|
245 | * |
---|
246 | * <pre> |
---|
247 | * Options specific to classifier weka.classifiers.trees.DecisionStump: |
---|
248 | * </pre> |
---|
249 | * |
---|
250 | * <pre> -D |
---|
251 | * If set, classifier is run in debug mode and |
---|
252 | * may output additional info to the console</pre> |
---|
253 | * |
---|
254 | <!-- options-end --> |
---|
255 | * |
---|
256 | * Options after -- are passed to the designated classifier.<p> |
---|
257 | * |
---|
258 | * @param options the list of options as an array of strings |
---|
259 | * @throws Exception if an option is not supported |
---|
260 | */ |
---|
261 | public void setOptions(String[] options) throws Exception { |
---|
262 | |
---|
263 | String subcmtyString = Utils.getOption('C', options); |
---|
264 | if (subcmtyString.length() != 0) { |
---|
265 | setNumSubCmtys(Integer.parseInt(subcmtyString)); |
---|
266 | } else { |
---|
267 | setNumSubCmtys(3); |
---|
268 | } |
---|
269 | |
---|
270 | super.setOptions(options); |
---|
271 | } |
---|
272 | |
---|
273 | /** |
---|
274 | * Gets the current settings of the Classifier. |
---|
275 | * |
---|
276 | * @return an array of strings suitable for passing to setOptions |
---|
277 | */ |
---|
278 | public String [] getOptions() { |
---|
279 | |
---|
280 | String [] ops = super.getOptions(); |
---|
281 | String [] options = new String[ops.length + 2]; |
---|
282 | options[0] = "-C"; options[1] = "" + getNumSubCmtys(); |
---|
283 | System.arraycopy(ops, 0, options, 2, ops.length); |
---|
284 | return options; |
---|
285 | } |
---|
286 | |
---|
287 | /** |
---|
288 | * Returns the tip text for this property |
---|
289 | * @return tip text for this property suitable for |
---|
290 | * displaying in the explorer/experimenter gui |
---|
291 | */ |
---|
292 | public String numSubCmtysTipText() { |
---|
293 | return "Sets the (approximate) number of subcommittees."; |
---|
294 | } |
---|
295 | |
---|
296 | |
---|
297 | /** |
---|
298 | * Set the number of sub committees to use |
---|
299 | * |
---|
300 | * @param subc the number of sub committees |
---|
301 | */ |
---|
302 | public void setNumSubCmtys(int subc) { |
---|
303 | |
---|
304 | m_NumSubCmtys = subc; |
---|
305 | } |
---|
306 | |
---|
307 | /** |
---|
308 | * Get the number of sub committees to use |
---|
309 | * |
---|
310 | * @return the seed for resampling |
---|
311 | */ |
---|
312 | public int getNumSubCmtys() { |
---|
313 | |
---|
314 | return m_NumSubCmtys; |
---|
315 | } |
---|
316 | |
---|
317 | /** |
---|
318 | * Method for building this classifier. |
---|
319 | * |
---|
320 | * @param training the data to train with |
---|
321 | * @throws Exception if the training fails |
---|
322 | */ |
---|
323 | public void buildClassifier(Instances training) throws Exception { |
---|
324 | |
---|
325 | m_Random = new Random(m_Seed); |
---|
326 | |
---|
327 | super.buildClassifier(training); |
---|
328 | |
---|
329 | m_Random = null; |
---|
330 | } |
---|
331 | |
---|
332 | /** |
---|
333 | * Sets the weights for the next iteration. |
---|
334 | * |
---|
335 | * @param training the data to train with |
---|
336 | * @param reweight the reweighting factor |
---|
337 | * @throws Exception in case of an error |
---|
338 | */ |
---|
339 | protected void setWeights(Instances training, double reweight) |
---|
340 | throws Exception { |
---|
341 | |
---|
342 | int subCmtySize = m_Classifiers.length / m_NumSubCmtys; |
---|
343 | |
---|
344 | if ((m_NumIterationsPerformed + 1) % subCmtySize == 0) { |
---|
345 | |
---|
346 | if (getDebug()) |
---|
347 | System.err.println(m_NumIterationsPerformed + " " + subCmtySize); |
---|
348 | |
---|
349 | double oldSumOfWeights = training.sumOfWeights(); |
---|
350 | |
---|
351 | // Randomly set the weights of the training instances to the poisson distributon |
---|
352 | for (int i = 0; i < training.numInstances(); i++) { |
---|
353 | training.instance(i).setWeight( - Math.log((m_Random.nextDouble() * 9999) / 10000) ); |
---|
354 | } |
---|
355 | |
---|
356 | // Renormailise weights |
---|
357 | double sumProbs = training.sumOfWeights(); |
---|
358 | for (int i = 0; i < training.numInstances(); i++) { |
---|
359 | training.instance(i).setWeight(training.instance(i).weight() * oldSumOfWeights / sumProbs); |
---|
360 | } |
---|
361 | } else { |
---|
362 | super.setWeights(training, reweight); |
---|
363 | } |
---|
364 | } |
---|
365 | |
---|
366 | /** |
---|
367 | * Returns description of the boosted classifier. |
---|
368 | * |
---|
369 | * @return description of the boosted classifier as a string |
---|
370 | */ |
---|
371 | public String toString() { |
---|
372 | |
---|
373 | // only ZeroR model? |
---|
374 | if (m_ZeroR != null) { |
---|
375 | StringBuffer buf = new StringBuffer(); |
---|
376 | buf.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n"); |
---|
377 | buf.append(this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n"); |
---|
378 | buf.append("Warning: No model could be built, hence ZeroR model is used:\n\n"); |
---|
379 | buf.append(m_ZeroR.toString()); |
---|
380 | return buf.toString(); |
---|
381 | } |
---|
382 | |
---|
383 | StringBuffer text = new StringBuffer(); |
---|
384 | |
---|
385 | if (m_NumIterations == 0) { |
---|
386 | text.append("MultiBoostAB: No model built yet.\n"); |
---|
387 | } else if (m_NumIterations == 1) { |
---|
388 | text.append("MultiBoostAB: No boosting possible, one classifier used!\n"); |
---|
389 | text.append(m_Classifiers[0].toString() + "\n"); |
---|
390 | } else { |
---|
391 | text.append("MultiBoostAB: Base classifiers and their weights: \n\n"); |
---|
392 | for (int i = 0; i < m_NumIterations ; i++) { |
---|
393 | if ( (m_Classifiers != null) && (m_Classifiers[i] != null) ) { |
---|
394 | text.append(m_Classifiers[i].toString() + "\n\n"); |
---|
395 | text.append("Weight: " + Utils.roundDouble(m_Betas[i], 2) + "\n\n"); |
---|
396 | } |
---|
397 | else { |
---|
398 | text.append("not yet initialized!\n\n"); |
---|
399 | } |
---|
400 | } |
---|
401 | text.append("Number of performed Iterations: " + m_NumIterations + "\n"); |
---|
402 | } |
---|
403 | |
---|
404 | return text.toString(); |
---|
405 | } |
---|
406 | |
---|
407 | /** |
---|
408 | * Returns the revision string. |
---|
409 | * |
---|
410 | * @return the revision |
---|
411 | */ |
---|
412 | public String getRevision() { |
---|
413 | return RevisionUtils.extract("$Revision: 1.16 $"); |
---|
414 | } |
---|
415 | |
---|
416 | /** |
---|
417 | * Main method for testing this class. |
---|
418 | * |
---|
419 | * @param argv the options |
---|
420 | */ |
---|
421 | public static void main(String [] argv) { |
---|
422 | runClassifier(new MultiBoostAB(), argv); |
---|
423 | } |
---|
424 | } |
---|