source: src/main/java/weka/classifiers/meta/AdditiveRegression.java @ 18

Last change on this file since 18 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 15.4 KB
RevLine 
[4]1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    AdditiveRegression.java
19 *    Copyright (C) 2000 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.classifiers.meta;
24
25import weka.classifiers.Classifier;
26import weka.classifiers.AbstractClassifier;
27import weka.classifiers.IteratedSingleClassifierEnhancer;
28import weka.classifiers.rules.ZeroR;
29import weka.core.AdditionalMeasureProducer;
30import weka.core.Capabilities;
31import weka.core.Instance;
32import weka.core.Instances;
33import weka.core.Option;
34import weka.core.OptionHandler;
35import weka.core.RevisionUtils;
36import weka.core.TechnicalInformation;
37import weka.core.TechnicalInformationHandler;
38import weka.core.Utils;
39import weka.core.WeightedInstancesHandler;
40import weka.core.Capabilities.Capability;
41import weka.core.TechnicalInformation.Field;
42import weka.core.TechnicalInformation.Type;
43
44import java.util.Enumeration;
45import java.util.Vector;
46
47/**
48 <!-- globalinfo-start -->
49 * Meta classifier that enhances the performance of a regression base classifier. Each iteration fits a model to the residuals left by the classifier on the previous iteration. Prediction is accomplished by adding the predictions of each classifier. Reducing the shrinkage (learning rate) parameter helps prevent overfitting and has a smoothing effect but increases the learning time.<br/>
50 * <br/>
51 * For more information see:<br/>
52 * <br/>
53 * J.H. Friedman (1999). Stochastic Gradient Boosting.
54 * <p/>
55 <!-- globalinfo-end -->
56 *
57 <!-- technical-bibtex-start -->
58 * BibTeX:
59 * <pre>
60 * &#64;techreport{Friedman1999,
61 *    author = {J.H. Friedman},
62 *    institution = {Stanford University},
63 *    title = {Stochastic Gradient Boosting},
64 *    year = {1999},
65 *    PS = {http://www-stat.stanford.edu/\~jhf/ftp/stobst.ps}
66 * }
67 * </pre>
68 * <p/>
69 <!-- technical-bibtex-end -->
70 *
71 <!-- options-start -->
72 * Valid options are: <p/>
73 *
74 * <pre> -S
75 *  Specify shrinkage rate. (default = 1.0, ie. no shrinkage)
76 * </pre>
77 *
78 * <pre> -I &lt;num&gt;
79 *  Number of iterations.
80 *  (default 10)</pre>
81 *
82 * <pre> -D
83 *  If set, classifier is run in debug mode and
84 *  may output additional info to the console</pre>
85 *
86 * <pre> -W
87 *  Full name of base classifier.
88 *  (default: weka.classifiers.trees.DecisionStump)</pre>
89 *
90 * <pre>
91 * Options specific to classifier weka.classifiers.trees.DecisionStump:
92 * </pre>
93 *
94 * <pre> -D
95 *  If set, classifier is run in debug mode and
96 *  may output additional info to the console</pre>
97 *
98 <!-- options-end -->
99 *
100 * @author Mark Hall (mhall@cs.waikato.ac.nz)
101 * @version $Revision: 5928 $
102 */
103public class AdditiveRegression 
104  extends IteratedSingleClassifierEnhancer
105  implements OptionHandler,
106             AdditionalMeasureProducer,
107             WeightedInstancesHandler,
108             TechnicalInformationHandler {
109
110  /** for serialization */
111  static final long serialVersionUID = -2368937577670527151L;
112 
113  /**
114   * Shrinkage (Learning rate). Default = no shrinkage.
115   */
116  protected double m_shrinkage = 1.0;
117
118  /** The number of successfully generated base classifiers. */
119  protected int m_NumIterationsPerformed;
120
121  /** The model for the mean */
122  protected ZeroR m_zeroR;
123
124  /** whether we have suitable data or nor (if not, ZeroR model is used) */
125  protected boolean m_SuitableData = true;
126 
127  /**
128   * Returns a string describing this attribute evaluator
129   * @return a description of the evaluator suitable for
130   * displaying in the explorer/experimenter gui
131   */
132  public String globalInfo() {
133    return " Meta classifier that enhances the performance of a regression "
134      +"base classifier. Each iteration fits a model to the residuals left "
135      +"by the classifier on the previous iteration. Prediction is "
136      +"accomplished by adding the predictions of each classifier. "
137      +"Reducing the shrinkage (learning rate) parameter helps prevent "
138      +"overfitting and has a smoothing effect but increases the learning "
139      +"time.\n\n"
140      +"For more information see:\n\n"
141      + getTechnicalInformation().toString();
142  }
143
144  /**
145   * Returns an instance of a TechnicalInformation object, containing
146   * detailed information about the technical background of this class,
147   * e.g., paper reference or book this class is based on.
148   *
149   * @return the technical information about this class
150   */
151  public TechnicalInformation getTechnicalInformation() {
152    TechnicalInformation        result;
153   
154    result = new TechnicalInformation(Type.TECHREPORT);
155    result.setValue(Field.AUTHOR, "J.H. Friedman");
156    result.setValue(Field.YEAR, "1999");
157    result.setValue(Field.TITLE, "Stochastic Gradient Boosting");
158    result.setValue(Field.INSTITUTION, "Stanford University");
159    result.setValue(Field.PS, "http://www-stat.stanford.edu/~jhf/ftp/stobst.ps");
160   
161    return result;
162  }
163
164  /**
165   * Default constructor specifying DecisionStump as the classifier
166   */
167  public AdditiveRegression() {
168
169    this(new weka.classifiers.trees.DecisionStump());
170  }
171
172  /**
173   * Constructor which takes base classifier as argument.
174   *
175   * @param classifier the base classifier to use
176   */
177  public AdditiveRegression(Classifier classifier) {
178
179    m_Classifier = classifier;
180  }
181
182  /**
183   * String describing default classifier.
184   *
185   * @return the default classifier classname
186   */
187  protected String defaultClassifierString() {
188   
189    return "weka.classifiers.trees.DecisionStump";
190  }
191
192  /**
193   * Returns an enumeration describing the available options.
194   *
195   * @return an enumeration of all the available options.
196   */
197  public Enumeration listOptions() {
198
199    Vector newVector = new Vector(4);
200
201    newVector.addElement(new Option(
202              "\tSpecify shrinkage rate. "
203              +"(default = 1.0, ie. no shrinkage)\n", 
204              "S", 1, "-S"));
205
206    Enumeration enu = super.listOptions();
207    while (enu.hasMoreElements()) {
208      newVector.addElement(enu.nextElement());
209    }
210    return newVector.elements();
211  }
212
213  /**
214   * Parses a given list of options. <p/>
215   *
216   <!-- options-start -->
217   * Valid options are: <p/>
218   *
219   * <pre> -S
220   *  Specify shrinkage rate. (default = 1.0, ie. no shrinkage)
221   * </pre>
222   *
223   * <pre> -I &lt;num&gt;
224   *  Number of iterations.
225   *  (default 10)</pre>
226   *
227   * <pre> -D
228   *  If set, classifier is run in debug mode and
229   *  may output additional info to the console</pre>
230   *
231   * <pre> -W
232   *  Full name of base classifier.
233   *  (default: weka.classifiers.trees.DecisionStump)</pre>
234   *
235   * <pre>
236   * Options specific to classifier weka.classifiers.trees.DecisionStump:
237   * </pre>
238   *
239   * <pre> -D
240   *  If set, classifier is run in debug mode and
241   *  may output additional info to the console</pre>
242   *
243   <!-- options-end -->
244   *
245   * @param options the list of options as an array of strings
246   * @throws Exception if an option is not supported
247   */
248  public void setOptions(String[] options) throws Exception {
249
250    String optionString = Utils.getOption('S', options);
251    if (optionString.length() != 0) {
252      Double temp = Double.valueOf(optionString);
253      setShrinkage(temp.doubleValue());
254    }
255
256    super.setOptions(options);
257  }
258
259  /**
260   * Gets the current settings of the Classifier.
261   *
262   * @return an array of strings suitable for passing to setOptions
263   */
264  public String [] getOptions() {
265   
266    String [] superOptions = super.getOptions();
267    String [] options = new String [superOptions.length + 2];
268    int current = 0;
269
270    options[current++] = "-S"; options[current++] = "" + getShrinkage();
271
272    System.arraycopy(superOptions, 0, options, current, 
273                     superOptions.length);
274
275    current += superOptions.length;
276    while (current < options.length) {
277      options[current++] = "";
278    }
279    return options;
280  }
281
282  /**
283   * Returns the tip text for this property
284   * @return tip text for this property suitable for
285   * displaying in the explorer/experimenter gui
286   */
287  public String shrinkageTipText() {
288    return "Shrinkage rate. Smaller values help prevent overfitting and "
289      + "have a smoothing effect (but increase learning time). "
290      +"Default = 1.0, ie. no shrinkage."; 
291  }
292
293  /**
294   * Set the shrinkage parameter
295   *
296   * @param l the shrinkage rate.
297   */
298  public void setShrinkage(double l) {
299    m_shrinkage = l;
300  }
301
302  /**
303   * Get the shrinkage rate.
304   *
305   * @return the value of the learning rate
306   */
307  public double getShrinkage() {
308    return m_shrinkage;
309  }
310
311  /**
312   * Returns default capabilities of the classifier.
313   *
314   * @return      the capabilities of this classifier
315   */
316  public Capabilities getCapabilities() {
317    Capabilities result = super.getCapabilities();
318
319    // class
320    result.disableAllClasses();
321    result.disableAllClassDependencies();
322    result.enable(Capability.NUMERIC_CLASS);
323    result.enable(Capability.DATE_CLASS);
324   
325    return result;
326  }
327
328  /**
329   * Build the classifier on the supplied data
330   *
331   * @param data the training data
332   * @throws Exception if the classifier could not be built successfully
333   */
334  public void buildClassifier(Instances data) throws Exception {
335
336    super.buildClassifier(data);
337
338    // can classifier handle the data?
339    getCapabilities().testWithFail(data);
340
341    // remove instances with missing class
342    Instances newData = new Instances(data);
343    newData.deleteWithMissingClass();
344
345    double sum = 0;
346    double temp_sum = 0;
347    // Add the model for the mean first
348    m_zeroR = new ZeroR();
349    m_zeroR.buildClassifier(newData);
350   
351    // only class? -> use only ZeroR model
352    if (newData.numAttributes() == 1) {
353      System.err.println(
354          "Cannot build model (only class attribute present in data!), "
355          + "using ZeroR model instead!");
356      m_SuitableData = false;
357      return;
358    }
359    else {
360      m_SuitableData = true;
361    }
362   
363    newData = residualReplace(newData, m_zeroR, false);
364    for (int i = 0; i < newData.numInstances(); i++) {
365      sum += newData.instance(i).weight() *
366        newData.instance(i).classValue() * newData.instance(i).classValue();
367    }
368    if (m_Debug) {
369      System.err.println("Sum of squared residuals "
370                         +"(predicting the mean) : " + sum);
371    }
372
373    m_NumIterationsPerformed = 0;
374    do {
375      temp_sum = sum;
376
377      // Build the classifier
378      m_Classifiers[m_NumIterationsPerformed].buildClassifier(newData);
379
380      newData = residualReplace(newData, m_Classifiers[m_NumIterationsPerformed], true);
381      sum = 0;
382      for (int i = 0; i < newData.numInstances(); i++) {
383        sum += newData.instance(i).weight() *
384          newData.instance(i).classValue() * newData.instance(i).classValue();
385      }
386      if (m_Debug) {
387        System.err.println("Sum of squared residuals : "+sum);
388      }
389      m_NumIterationsPerformed++;
390    } while (((temp_sum - sum) > Utils.SMALL) && 
391             (m_NumIterationsPerformed < m_Classifiers.length));
392  }
393
394  /**
395   * Classify an instance.
396   *
397   * @param inst the instance to predict
398   * @return a prediction for the instance
399   * @throws Exception if an error occurs
400   */
401  public double classifyInstance(Instance inst) throws Exception {
402
403    double prediction = m_zeroR.classifyInstance(inst);
404
405    // default model?
406    if (!m_SuitableData) {
407      return prediction;
408    }
409   
410    for (int i = 0; i < m_NumIterationsPerformed; i++) {
411      double toAdd = m_Classifiers[i].classifyInstance(inst);
412      toAdd *= getShrinkage();
413      prediction += toAdd;
414    }
415
416    return prediction;
417  }
418
419  /**
420   * Replace the class values of the instances from the current iteration
421   * with residuals ater predicting with the supplied classifier.
422   *
423   * @param data the instances to predict
424   * @param c the classifier to use
425   * @param useShrinkage whether shrinkage is to be applied to the model's output
426   * @return a new set of instances with class values replaced by residuals
427   * @throws Exception if something goes wrong
428   */
429  private Instances residualReplace(Instances data, Classifier c, 
430                                    boolean useShrinkage) throws Exception {
431    double pred,residual;
432    Instances newInst = new Instances(data);
433
434    for (int i = 0; i < newInst.numInstances(); i++) {
435      pred = c.classifyInstance(newInst.instance(i));
436      if (useShrinkage) {
437        pred *= getShrinkage();
438      }
439      residual = newInst.instance(i).classValue() - pred;
440      newInst.instance(i).setClassValue(residual);
441    }
442    //    System.err.print(newInst);
443    return newInst;
444  }
445
446  /**
447   * Returns an enumeration of the additional measure names
448   * @return an enumeration of the measure names
449   */
450  public Enumeration enumerateMeasures() {
451    Vector newVector = new Vector(1);
452    newVector.addElement("measureNumIterations");
453    return newVector.elements();
454  }
455
456  /**
457   * Returns the value of the named measure
458   * @param additionalMeasureName the name of the measure to query for its value
459   * @return the value of the named measure
460   * @throws IllegalArgumentException if the named measure is not supported
461   */
462  public double getMeasure(String additionalMeasureName) {
463    if (additionalMeasureName.compareToIgnoreCase("measureNumIterations") == 0) {
464      return measureNumIterations();
465    } else {
466      throw new IllegalArgumentException(additionalMeasureName
467                          + " not supported (AdditiveRegression)");
468    }
469  }
470
471  /**
472   * return the number of iterations (base classifiers) completed
473   * @return the number of iterations (same as number of base classifier
474   * models)
475   */
476  public double measureNumIterations() {
477    return m_NumIterationsPerformed;
478  }
479
480  /**
481   * Returns textual description of the classifier.
482   *
483   * @return a description of the classifier as a string
484   */
485  public String toString() {
486    StringBuffer text = new StringBuffer();
487
488    // only ZeroR model?
489    if (!m_SuitableData) {
490      StringBuffer buf = new StringBuffer();
491      buf.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n");
492      buf.append(this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n");
493      buf.append("Warning: No model could be built, hence ZeroR model is used:\n\n");
494      buf.append(m_zeroR.toString());
495      return buf.toString();
496    }
497   
498    if (m_NumIterations == 0) {
499      return "Classifier hasn't been built yet!";
500    }
501
502    text.append("Additive Regression\n\n");
503
504    text.append("ZeroR model\n\n" + m_zeroR + "\n\n");
505
506    text.append("Base classifier " 
507                + getClassifier().getClass().getName()
508                + "\n\n");
509    text.append("" + m_NumIterationsPerformed + " models generated.\n");
510
511    for (int i = 0; i < m_NumIterationsPerformed; i++) {
512      text.append("\nModel number " + i + "\n\n" +
513                  m_Classifiers[i] + "\n");
514    }
515
516    return text.toString();
517  }
518 
519  /**
520   * Returns the revision string.
521   *
522   * @return            the revision
523   */
524  public String getRevision() {
525    return RevisionUtils.extract("$Revision: 5928 $");
526  }
527
528  /**
529   * Main method for testing this class.
530   *
531   * @param argv should contain the following arguments:
532   * -t training file [-T test file] [-c class index]
533   */
534  public static void main(String [] argv) {
535    runClassifier(new AdditiveRegression(), argv);
536  }
537}
Note: See TracBrowser for help on using the repository browser.