source: src/main/java/weka/classifiers/meta/MultiBoostAB.java @ 21

Last change on this file since 21 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 13.2 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    MultiBoostAB.java
19 *
20 *    MultiBoosting is an extension to the highly successful AdaBoost
21 *    technique for forming decision committees. MultiBoosting can be
22 *    viewed as combining AdaBoost with wagging. It is able to harness
23 *    both AdaBoost's high bias and variance reduction with wagging's
24 *    superior variance reduction. Using C4.5 as the base learning
25 *    algorithm, Multi-boosting is demonstrated to produce decision
26 *    committees with lower error than either AdaBoost or wagging
27 *    significantly more often than the reverse over a large
28 *    representative cross-section of UCI data sets. It offers the
29 *    further advantage over AdaBoost of suiting parallel execution.
30 *   
31 *    For more info refer to :
32 <!-- technical-plaintext-start -->
33 * Geoffrey I. Webb (2000). MultiBoosting: A Technique for Combining Boosting and Wagging. Machine Learning. Vol.40(No.2).
34 <!-- technical-plaintext-end -->
35 *
36 *    Originally based on AdaBoostM1.java
37 *   
38 *    http://www.cm.deakin.edu.au/webb
39 *
40 *    School of Computing and Mathematics
41 *    Deakin University
42 *    Geelong, Vic, 3217, Australia
43 *    Copyright (C) 2001 Deakin University
44 *
45 */
46
47package weka.classifiers.meta;
48
49import weka.core.Instances;
50import weka.core.Option;
51import weka.core.RevisionUtils;
52import weka.core.TechnicalInformation;
53import weka.core.TechnicalInformationHandler;
54import weka.core.Utils;
55import weka.core.TechnicalInformation.Field;
56import weka.core.TechnicalInformation.Type;
57
58import java.util.Enumeration;
59import java.util.Random;
60import java.util.Vector;
61
62/**
63 <!-- globalinfo-start -->
64 * Class for boosting a classifier using the MultiBoosting method.<br/>
65 * <br/>
66 * MultiBoosting is an extension to the highly successful AdaBoost technique for forming decision committees. MultiBoosting can be viewed as combining AdaBoost with wagging. It is able to harness both AdaBoost's high bias and variance reduction with wagging's superior variance reduction. Using C4.5 as the base learning algorithm, Multi-boosting is demonstrated to produce decision committees with lower error than either AdaBoost or wagging significantly more often than the reverse over a large representative cross-section of UCI data sets. It offers the further advantage over AdaBoost of suiting parallel execution.<br/>
67 * <br/>
68 * For more information, see<br/>
69 * <br/>
70 * Geoffrey I. Webb (2000). MultiBoosting: A Technique for Combining Boosting and Wagging. Machine Learning. Vol.40(No.2).
71 * <p/>
72 <!-- globalinfo-end -->
73 *
74 <!-- technical-bibtex-start -->
75 * BibTeX:
76 * <pre>
77 * &#64;article{Webb2000,
78 *    address = {Boston},
79 *    author = {Geoffrey I. Webb},
80 *    journal = {Machine Learning},
81 *    number = {No.2},
82 *    publisher = {Kluwer Academic Publishers},
83 *    title = {MultiBoosting: A Technique for Combining Boosting and Wagging},
84 *    volume = {Vol.40},
85 *    year = {2000}
86 * }
87 * </pre>
88 * <p/>
89 <!-- technical-bibtex-end -->
90 *
91 <!-- options-start -->
92 * Valid options are: <p/>
93 *
94 * <pre> -C &lt;num&gt;
95 *  Number of sub-committees. (Default 3)</pre>
96 *
97 * <pre> -P &lt;num&gt;
98 *  Percentage of weight mass to base training on.
99 *  (default 100, reduce to around 90 speed up)</pre>
100 *
101 * <pre> -Q
102 *  Use resampling for boosting.</pre>
103 *
104 * <pre> -S &lt;num&gt;
105 *  Random number seed.
106 *  (default 1)</pre>
107 *
108 * <pre> -I &lt;num&gt;
109 *  Number of iterations.
110 *  (default 10)</pre>
111 *
112 * <pre> -D
113 *  If set, classifier is run in debug mode and
114 *  may output additional info to the console</pre>
115 *
116 * <pre> -W
117 *  Full name of base classifier.
118 *  (default: weka.classifiers.trees.DecisionStump)</pre>
119 *
120 * <pre>
121 * Options specific to classifier weka.classifiers.trees.DecisionStump:
122 * </pre>
123 *
124 * <pre> -D
125 *  If set, classifier is run in debug mode and
126 *  may output additional info to the console</pre>
127 *
128 <!-- options-end -->
129 *
130 * Options after -- are passed to the designated classifier.<p>
131 *
132 * @author Shane Butler (sbutle@deakin.edu.au)
133 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
134 * @author Len Trigg (trigg@cs.waikato.ac.nz)
135 * @version $Revision: 1.16 $
136 */
137public class MultiBoostAB 
138  extends AdaBoostM1
139  implements TechnicalInformationHandler {
140
141  /** for serialization */
142  static final long serialVersionUID = -6681619178187935148L;
143 
144  /** The number of sub-committees to use */
145  protected int m_NumSubCmtys = 3;
146
147  /** Random number generator */
148  protected Random m_Random = null;
149   
150  /**
151   * Returns a string describing classifier
152   * @return a description suitable for
153   * displaying in the explorer/experimenter gui
154   */
155  public String globalInfo() {
156
157    return  "Class for boosting a classifier using the MultiBoosting method.\n\n"
158      + "MultiBoosting is an extension to the highly successful AdaBoost "
159      + "technique for forming decision committees. MultiBoosting can be "
160      + "viewed as combining AdaBoost with wagging. It is able to harness "
161      + "both AdaBoost's high bias and variance reduction with wagging's "
162      + "superior variance reduction. Using C4.5 as the base learning "
163      + "algorithm, Multi-boosting is demonstrated to produce decision "
164      + "committees with lower error than either AdaBoost or wagging "
165      + "significantly more often than the reverse over a large "
166      + "representative cross-section of UCI data sets. It offers the "
167      + "further advantage over AdaBoost of suiting parallel execution.\n\n"
168      + "For more information, see\n\n"
169      + getTechnicalInformation().toString();
170  }
171
172  /**
173   * Returns an instance of a TechnicalInformation object, containing
174   * detailed information about the technical background of this class,
175   * e.g., paper reference or book this class is based on.
176   *
177   * @return the technical information about this class
178   */
179  public TechnicalInformation getTechnicalInformation() {
180    TechnicalInformation        result;
181   
182    result = new TechnicalInformation(Type.ARTICLE);
183    result.setValue(Field.AUTHOR, "Geoffrey I. Webb");
184    result.setValue(Field.YEAR, "2000");
185    result.setValue(Field.TITLE, "MultiBoosting: A Technique for Combining Boosting and Wagging");
186    result.setValue(Field.JOURNAL, "Machine Learning");
187    result.setValue(Field.VOLUME, "Vol.40");
188    result.setValue(Field.NUMBER, "No.2");
189    result.setValue(Field.PUBLISHER, "Kluwer Academic Publishers");
190    result.setValue(Field.ADDRESS, "Boston");
191   
192    return result;
193  }
194
195  /**
196   * Returns an enumeration describing the available options
197   *
198   * @return an enumeration of all the available options
199   */
200  public Enumeration listOptions() {
201
202    Enumeration enu = super.listOptions();
203    Vector vec = new Vector(1);
204
205    vec.addElement(new Option(
206              "\tNumber of sub-committees. (Default 3)",
207              "C", 1, "-C <num>"));
208    while (enu.hasMoreElements()) {
209      vec.addElement(enu.nextElement());
210    }
211    return vec.elements();
212  }
213
214  /**
215   * Parses a given list of options. <p/>
216   *
217   <!-- options-start -->
218   * Valid options are: <p/>
219   *
220   * <pre> -C &lt;num&gt;
221   *  Number of sub-committees. (Default 3)</pre>
222   *
223   * <pre> -P &lt;num&gt;
224   *  Percentage of weight mass to base training on.
225   *  (default 100, reduce to around 90 speed up)</pre>
226   *
227   * <pre> -Q
228   *  Use resampling for boosting.</pre>
229   *
230   * <pre> -S &lt;num&gt;
231   *  Random number seed.
232   *  (default 1)</pre>
233   *
234   * <pre> -I &lt;num&gt;
235   *  Number of iterations.
236   *  (default 10)</pre>
237   *
238   * <pre> -D
239   *  If set, classifier is run in debug mode and
240   *  may output additional info to the console</pre>
241   *
242   * <pre> -W
243   *  Full name of base classifier.
244   *  (default: weka.classifiers.trees.DecisionStump)</pre>
245   *
246   * <pre>
247   * Options specific to classifier weka.classifiers.trees.DecisionStump:
248   * </pre>
249   *
250   * <pre> -D
251   *  If set, classifier is run in debug mode and
252   *  may output additional info to the console</pre>
253   *
254   <!-- options-end -->
255   *
256   * Options after -- are passed to the designated classifier.<p>
257   *
258   * @param options the list of options as an array of strings
259   * @throws Exception if an option is not supported
260   */
261  public void setOptions(String[] options) throws Exception {
262
263    String subcmtyString = Utils.getOption('C', options);
264    if (subcmtyString.length() != 0) {
265      setNumSubCmtys(Integer.parseInt(subcmtyString));
266    } else {
267      setNumSubCmtys(3);
268    }
269
270    super.setOptions(options);
271  }
272
273  /**
274   * Gets the current settings of the Classifier.
275   *
276   * @return an array of strings suitable for passing to setOptions
277   */
278  public String [] getOptions() {
279
280    String [] ops = super.getOptions();
281    String [] options = new String[ops.length + 2];
282    options[0] = "-C"; options[1] = "" + getNumSubCmtys();
283    System.arraycopy(ops, 0, options, 2, ops.length);
284    return options;
285  }
286
287  /**
288   * Returns the tip text for this property
289   * @return tip text for this property suitable for
290   * displaying in the explorer/experimenter gui
291   */
292  public String numSubCmtysTipText() {
293    return "Sets the (approximate) number of subcommittees.";
294  }
295
296
297  /**
298   * Set the number of sub committees to use
299   *
300   * @param subc the number of sub committees
301   */
302  public void setNumSubCmtys(int subc) {
303
304    m_NumSubCmtys = subc;
305  }
306
307  /**
308   * Get the number of sub committees to use
309   *
310   * @return the seed for resampling
311   */
312  public int getNumSubCmtys() {
313
314    return m_NumSubCmtys;
315  }
316
317  /**
318   * Method for building this classifier.
319   *
320   * @param training the data to train with
321   * @throws Exception if the training fails
322   */
323  public void buildClassifier(Instances training) throws Exception {
324
325    m_Random = new Random(m_Seed);
326
327    super.buildClassifier(training);
328
329    m_Random = null;
330  }
331
332  /**
333   * Sets the weights for the next iteration.
334   *
335   * @param training the data to train with
336   * @param reweight the reweighting factor
337   * @throws Exception in case of an error
338   */
339  protected void setWeights(Instances training, double reweight) 
340    throws Exception {
341
342    int subCmtySize = m_Classifiers.length / m_NumSubCmtys;
343
344    if ((m_NumIterationsPerformed + 1) % subCmtySize == 0) {
345
346      if (getDebug())
347        System.err.println(m_NumIterationsPerformed + " " + subCmtySize);
348
349      double oldSumOfWeights = training.sumOfWeights();
350
351      // Randomly set the weights of the training instances to the poisson distributon
352      for (int i = 0; i < training.numInstances(); i++) {
353        training.instance(i).setWeight( - Math.log((m_Random.nextDouble() * 9999) / 10000) );
354      }
355
356      // Renormailise weights
357      double sumProbs = training.sumOfWeights();
358      for (int i = 0; i < training.numInstances(); i++) {
359        training.instance(i).setWeight(training.instance(i).weight() * oldSumOfWeights / sumProbs);
360      }
361    } else {
362      super.setWeights(training, reweight);
363    }
364  }
365 
366  /**
367   * Returns description of the boosted classifier.
368   *
369   * @return description of the boosted classifier as a string
370   */
371  public String toString() {
372   
373    // only ZeroR model?
374    if (m_ZeroR != null) {
375      StringBuffer buf = new StringBuffer();
376      buf.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n");
377      buf.append(this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n");
378      buf.append("Warning: No model could be built, hence ZeroR model is used:\n\n");
379      buf.append(m_ZeroR.toString());
380      return buf.toString();
381    }
382   
383    StringBuffer text = new StringBuffer();
384   
385    if (m_NumIterations == 0) {
386      text.append("MultiBoostAB: No model built yet.\n");
387    } else if (m_NumIterations == 1) {
388      text.append("MultiBoostAB: No boosting possible, one classifier used!\n");
389      text.append(m_Classifiers[0].toString() + "\n");
390    } else {
391      text.append("MultiBoostAB: Base classifiers and their weights: \n\n");
392      for (int i = 0; i < m_NumIterations ; i++) {
393        if ( (m_Classifiers != null) && (m_Classifiers[i] != null) ) {
394          text.append(m_Classifiers[i].toString() + "\n\n");
395          text.append("Weight: " + Utils.roundDouble(m_Betas[i], 2) + "\n\n");
396        }
397        else {
398          text.append("not yet initialized!\n\n");
399        }
400      }
401      text.append("Number of performed Iterations: " + m_NumIterations + "\n");
402    }
403   
404    return text.toString();
405  }
406 
407  /**
408   * Returns the revision string.
409   *
410   * @return            the revision
411   */
412  public String getRevision() {
413    return RevisionUtils.extract("$Revision: 1.16 $");
414  }
415 
416  /**
417   * Main method for testing this class.
418   *
419   * @param argv the options
420   */
421  public static void main(String [] argv) {
422    runClassifier(new MultiBoostAB(), argv);
423  }
424}
Note: See TracBrowser for help on using the repository browser.