Context Navigation

source: src/main/java/weka/classifiers/meta/MultiBoostAB.java @ 16

Last change on this file since 16 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 13.2 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* MultiBoostAB.java
19	*
20	* MultiBoosting is an extension to the highly successful AdaBoost
21	* technique for forming decision committees. MultiBoosting can be
22	* viewed as combining AdaBoost with wagging. It is able to harness
23	* both AdaBoost's high bias and variance reduction with wagging's
24	* superior variance reduction. Using C4.5 as the base learning
25	* algorithm, Multi-boosting is demonstrated to produce decision
26	* committees with lower error than either AdaBoost or wagging
27	* significantly more often than the reverse over a large
28	* representative cross-section of UCI data sets. It offers the
29	* further advantage over AdaBoost of suiting parallel execution.
30	*
31	* For more info refer to :
32	<!-- technical-plaintext-start -->
33	* Geoffrey I. Webb (2000). MultiBoosting: A Technique for Combining Boosting and Wagging. Machine Learning. Vol.40(No.2).
34	<!-- technical-plaintext-end -->
35	*
36	* Originally based on AdaBoostM1.java
37	*
38	* http://www.cm.deakin.edu.au/webb
39	*
40	* School of Computing and Mathematics
41	* Deakin University
42	* Geelong, Vic, 3217, Australia
43	* Copyright (C) 2001 Deakin University
44	*
45	*/
46
47	package weka.classifiers.meta;
48
49	import weka.core.Instances;
50	import weka.core.Option;
51	import weka.core.RevisionUtils;
52	import weka.core.TechnicalInformation;
53	import weka.core.TechnicalInformationHandler;
54	import weka.core.Utils;
55	import weka.core.TechnicalInformation.Field;
56	import weka.core.TechnicalInformation.Type;
57
58	import java.util.Enumeration;
59	import java.util.Random;
60	import java.util.Vector;
61
62	/**
63	<!-- globalinfo-start -->
64	* Class for boosting a classifier using the MultiBoosting method.<br/>
65	* <br/>
66	* MultiBoosting is an extension to the highly successful AdaBoost technique for forming decision committees. MultiBoosting can be viewed as combining AdaBoost with wagging. It is able to harness both AdaBoost's high bias and variance reduction with wagging's superior variance reduction. Using C4.5 as the base learning algorithm, Multi-boosting is demonstrated to produce decision committees with lower error than either AdaBoost or wagging significantly more often than the reverse over a large representative cross-section of UCI data sets. It offers the further advantage over AdaBoost of suiting parallel execution.<br/>
67	* <br/>
68	* For more information, see<br/>
69	* <br/>
70	* Geoffrey I. Webb (2000). MultiBoosting: A Technique for Combining Boosting and Wagging. Machine Learning. Vol.40(No.2).
71	* <p/>
72	<!-- globalinfo-end -->
73	*
74	<!-- technical-bibtex-start -->
75	* BibTeX:
76	* <pre>
77	* @article{Webb2000,
78	* address = {Boston},
79	* author = {Geoffrey I. Webb},
80	* journal = {Machine Learning},
81	* number = {No.2},
82	* publisher = {Kluwer Academic Publishers},
83	* title = {MultiBoosting: A Technique for Combining Boosting and Wagging},
84	* volume = {Vol.40},
85	* year = {2000}
86	* }
87	* </pre>
88	* <p/>
89	<!-- technical-bibtex-end -->
90	*
91	<!-- options-start -->
92	* Valid options are: <p/>
93	*
94	* <pre> -C <num>
95	* Number of sub-committees. (Default 3)</pre>
96	*
97	* <pre> -P <num>
98	* Percentage of weight mass to base training on.
99	* (default 100, reduce to around 90 speed up)</pre>
100	*
101	* <pre> -Q
102	* Use resampling for boosting.</pre>
103	*
104	* <pre> -S <num>
105	* Random number seed.
106	* (default 1)</pre>
107	*
108	* <pre> -I <num>
109	* Number of iterations.
110	* (default 10)</pre>
111	*
112	* <pre> -D
113	* If set, classifier is run in debug mode and
114	* may output additional info to the console</pre>
115	*
116	* <pre> -W
117	* Full name of base classifier.
118	* (default: weka.classifiers.trees.DecisionStump)</pre>
119	*
120	* <pre>
121	* Options specific to classifier weka.classifiers.trees.DecisionStump:
122	* </pre>
123	*
124	* <pre> -D
125	* If set, classifier is run in debug mode and
126	* may output additional info to the console</pre>
127	*
128	<!-- options-end -->
129	*
130	* Options after -- are passed to the designated classifier.<p>
131	*
132	* @author Shane Butler (sbutle@deakin.edu.au)
133	* @author Eibe Frank (eibe@cs.waikato.ac.nz)
134	* @author Len Trigg (trigg@cs.waikato.ac.nz)
135	* @version $Revision: 1.16 $
136	*/
137	public class MultiBoostAB
138	extends AdaBoostM1
139	implements TechnicalInformationHandler {
140
141	/** for serialization */
142	static final long serialVersionUID = -6681619178187935148L;
143
144	/** The number of sub-committees to use */
145	protected int m_NumSubCmtys = 3;
146
147	/** Random number generator */
148	protected Random m_Random = null;
149
150	/**
151	* Returns a string describing classifier
152	* @return a description suitable for
153	* displaying in the explorer/experimenter gui
154	*/
155	public String globalInfo() {
156
157	return "Class for boosting a classifier using the MultiBoosting method.\n\n"
158	+ "MultiBoosting is an extension to the highly successful AdaBoost "
159	+ "technique for forming decision committees. MultiBoosting can be "
160	+ "viewed as combining AdaBoost with wagging. It is able to harness "
161	+ "both AdaBoost's high bias and variance reduction with wagging's "
162	+ "superior variance reduction. Using C4.5 as the base learning "
163	+ "algorithm, Multi-boosting is demonstrated to produce decision "
164	+ "committees with lower error than either AdaBoost or wagging "
165	+ "significantly more often than the reverse over a large "
166	+ "representative cross-section of UCI data sets. It offers the "
167	+ "further advantage over AdaBoost of suiting parallel execution.\n\n"
168	+ "For more information, see\n\n"
169	+ getTechnicalInformation().toString();
170	}
171
172	/**
173	* Returns an instance of a TechnicalInformation object, containing
174	* detailed information about the technical background of this class,
175	* e.g., paper reference or book this class is based on.
176	*
177	* @return the technical information about this class
178	*/
179	public TechnicalInformation getTechnicalInformation() {
180	TechnicalInformation result;
181
182	result = new TechnicalInformation(Type.ARTICLE);
183	result.setValue(Field.AUTHOR, "Geoffrey I. Webb");
184	result.setValue(Field.YEAR, "2000");
185	result.setValue(Field.TITLE, "MultiBoosting: A Technique for Combining Boosting and Wagging");
186	result.setValue(Field.JOURNAL, "Machine Learning");
187	result.setValue(Field.VOLUME, "Vol.40");
188	result.setValue(Field.NUMBER, "No.2");
189	result.setValue(Field.PUBLISHER, "Kluwer Academic Publishers");
190	result.setValue(Field.ADDRESS, "Boston");
191
192	return result;
193	}
194
195	/**
196	* Returns an enumeration describing the available options
197	*
198	* @return an enumeration of all the available options
199	*/
200	public Enumeration listOptions() {
201
202	Enumeration enu = super.listOptions();
203	Vector vec = new Vector(1);
204
205	vec.addElement(new Option(
206	"\tNumber of sub-committees. (Default 3)",
207	"C", 1, "-C <num>"));
208	while (enu.hasMoreElements()) {
209	vec.addElement(enu.nextElement());
210	}
211	return vec.elements();
212	}
213
214	/**
215	* Parses a given list of options. <p/>
216	*
217	<!-- options-start -->
218	* Valid options are: <p/>
219	*
220	* <pre> -C <num>
221	* Number of sub-committees. (Default 3)</pre>
222	*
223	* <pre> -P <num>
224	* Percentage of weight mass to base training on.
225	* (default 100, reduce to around 90 speed up)</pre>
226	*
227	* <pre> -Q
228	* Use resampling for boosting.</pre>
229	*
230	* <pre> -S <num>
231	* Random number seed.
232	* (default 1)</pre>
233	*
234	* <pre> -I <num>
235	* Number of iterations.
236	* (default 10)</pre>
237	*
238	* <pre> -D
239	* If set, classifier is run in debug mode and
240	* may output additional info to the console</pre>
241	*
242	* <pre> -W
243	* Full name of base classifier.
244	* (default: weka.classifiers.trees.DecisionStump)</pre>
245	*
246	* <pre>
247	* Options specific to classifier weka.classifiers.trees.DecisionStump:
248	* </pre>
249	*
250	* <pre> -D
251	* If set, classifier is run in debug mode and
252	* may output additional info to the console</pre>
253	*
254	<!-- options-end -->
255	*
256	* Options after -- are passed to the designated classifier.<p>
257	*
258	* @param options the list of options as an array of strings
259	* @throws Exception if an option is not supported
260	*/
261	public void setOptions(String[] options) throws Exception {
262
263	String subcmtyString = Utils.getOption('C', options);
264	if (subcmtyString.length() != 0) {
265	setNumSubCmtys(Integer.parseInt(subcmtyString));
266	} else {
267	setNumSubCmtys(3);
268	}
269
270	super.setOptions(options);
271	}
272
273	/**
274	* Gets the current settings of the Classifier.
275	*
276	* @return an array of strings suitable for passing to setOptions
277	*/
278	public String [] getOptions() {
279
280	String [] ops = super.getOptions();
281	String [] options = new String[ops.length + 2];
282	options[0] = "-C"; options[1] = "" + getNumSubCmtys();
283	System.arraycopy(ops, 0, options, 2, ops.length);
284	return options;
285	}
286
287	/**
288	* Returns the tip text for this property
289	* @return tip text for this property suitable for
290	* displaying in the explorer/experimenter gui
291	*/
292	public String numSubCmtysTipText() {
293	return "Sets the (approximate) number of subcommittees.";
294	}
295
296
297	/**
298	* Set the number of sub committees to use
299	*
300	* @param subc the number of sub committees
301	*/
302	public void setNumSubCmtys(int subc) {
303
304	m_NumSubCmtys = subc;
305	}
306
307	/**
308	* Get the number of sub committees to use
309	*
310	* @return the seed for resampling
311	*/
312	public int getNumSubCmtys() {
313
314	return m_NumSubCmtys;
315	}
316
317	/**
318	* Method for building this classifier.
319	*
320	* @param training the data to train with
321	* @throws Exception if the training fails
322	*/
323	public void buildClassifier(Instances training) throws Exception {
324
325	m_Random = new Random(m_Seed);
326
327	super.buildClassifier(training);
328
329	m_Random = null;
330	}
331
332	/**
333	* Sets the weights for the next iteration.
334	*
335	* @param training the data to train with
336	* @param reweight the reweighting factor
337	* @throws Exception in case of an error
338	*/
339	protected void setWeights(Instances training, double reweight)
340	throws Exception {
341
342	int subCmtySize = m_Classifiers.length / m_NumSubCmtys;
343
344	if ((m_NumIterationsPerformed + 1) % subCmtySize == 0) {
345
346	if (getDebug())
347	System.err.println(m_NumIterationsPerformed + " " + subCmtySize);
348
349	double oldSumOfWeights = training.sumOfWeights();
350
351	// Randomly set the weights of the training instances to the poisson distributon
352	for (int i = 0; i < training.numInstances(); i++) {
353	training.instance(i).setWeight( - Math.log((m_Random.nextDouble() * 9999) / 10000) );
354	}
355
356	// Renormailise weights
357	double sumProbs = training.sumOfWeights();
358	for (int i = 0; i < training.numInstances(); i++) {
359	training.instance(i).setWeight(training.instance(i).weight() * oldSumOfWeights / sumProbs);
360	}
361	} else {
362	super.setWeights(training, reweight);
363	}
364	}
365
366	/**
367	* Returns description of the boosted classifier.
368	*
369	* @return description of the boosted classifier as a string
370	*/
371	public String toString() {
372
373	// only ZeroR model?
374	if (m_ZeroR != null) {
375	StringBuffer buf = new StringBuffer();
376	buf.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n");
377	buf.append(this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n");
378	buf.append("Warning: No model could be built, hence ZeroR model is used:\n\n");
379	buf.append(m_ZeroR.toString());
380	return buf.toString();
381	}
382
383	StringBuffer text = new StringBuffer();
384
385	if (m_NumIterations == 0) {
386	text.append("MultiBoostAB: No model built yet.\n");
387	} else if (m_NumIterations == 1) {
388	text.append("MultiBoostAB: No boosting possible, one classifier used!\n");
389	text.append(m_Classifiers[0].toString() + "\n");
390	} else {
391	text.append("MultiBoostAB: Base classifiers and their weights: \n\n");
392	for (int i = 0; i < m_NumIterations ; i++) {
393	if ( (m_Classifiers != null) && (m_Classifiers[i] != null) ) {
394	text.append(m_Classifiers[i].toString() + "\n\n");
395	text.append("Weight: " + Utils.roundDouble(m_Betas[i], 2) + "\n\n");
396	}
397	else {
398	text.append("not yet initialized!\n\n");
399	}
400	}
401	text.append("Number of performed Iterations: " + m_NumIterations + "\n");
402	}
403
404	return text.toString();
405	}
406
407	/**
408	* Returns the revision string.
409	*
410	* @return the revision
411	*/
412	public String getRevision() {
413	return RevisionUtils.extract("$Revision: 1.16 $");
414	}
415
416	/**
417	* Main method for testing this class.
418	*
419	* @param argv the options
420	*/
421	public static void main(String [] argv) {
422	runClassifier(new MultiBoostAB(), argv);
423	}
424	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: