source: src/main/java/weka/classifiers/trees/m5/M5Base.java @ 19

Last change on this file since 19 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 17.2 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    M5Base.java
19 *    Copyright (C) 2000 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.classifiers.trees.m5;
24
25import weka.classifiers.Classifier;
26import weka.classifiers.AbstractClassifier;
27import weka.classifiers.functions.LinearRegression;
28import weka.core.AdditionalMeasureProducer;
29import weka.core.Capabilities;
30import weka.core.FastVector;
31import weka.core.Instance;
32import weka.core.Instances;
33import weka.core.Option;
34import weka.core.TechnicalInformation;
35import weka.core.TechnicalInformationHandler;
36import weka.core.Utils;
37import weka.core.TechnicalInformation.Field;
38import weka.core.TechnicalInformation.Type;
39import weka.filters.Filter;
40import weka.filters.supervised.attribute.NominalToBinary;
41import weka.filters.unsupervised.attribute.RemoveUseless;
42import weka.filters.unsupervised.attribute.ReplaceMissingValues;
43
44import java.util.Enumeration;
45import java.util.Random;
46import java.util.Vector;
47
48/**
49 * M5Base. Implements base routines
50 * for generating M5 Model trees and rules. <p>
51 *
52 * The original algorithm M5 was invented by Quinlan: <br/>
53 *
54 * Quinlan J. R. (1992). Learning with continuous classes. Proceedings of
55 * the Australian Joint Conference on Artificial Intelligence. 343--348.
56 * World Scientific, Singapore. <p/>
57 *
58 * Yong Wang made improvements and created M5': <br/>
59 *
60 * Wang, Y and Witten, I. H. (1997). Induction of model trees for
61 * predicting continuous classes. Proceedings of the poster papers of the
62 * European Conference on Machine Learning. University of Economics,
63 * Faculty of Informatics and Statistics, Prague. <p/>
64 *
65 * Valid options are:<p>
66 *
67 * -U <br>
68 * Use unsmoothed predictions. <p>
69 *
70 * -R <br>
71 * Build regression tree/rule rather than model tree/rule
72 *
73 * @author Mark Hall (mhall@cs.waikato.ac.nz)
74 * @version $Revision: 5928 $
75 */
76public abstract class M5Base 
77  extends AbstractClassifier
78  implements AdditionalMeasureProducer,
79             TechnicalInformationHandler {
80
81  /** for serialization */
82  private static final long serialVersionUID = -4022221950191647679L;
83
84  /**
85   * the instances covered by the tree/rules
86   */
87  private Instances m_instances;
88
89  /**
90   * the rule set
91   */
92  protected FastVector m_ruleSet;
93
94  /**
95   * generate a decision list instead of a single tree.
96   */
97  private boolean m_generateRules;
98
99  /**
100   * use unsmoothed predictions
101   */
102  private boolean m_unsmoothedPredictions;
103
104  /**
105   * filter to fill in missing values
106   */
107  private ReplaceMissingValues m_replaceMissing;
108
109  /**
110   * filter to convert nominal attributes to binary
111   */
112  private NominalToBinary m_nominalToBinary;
113 
114  /**
115   * for removing useless attributes
116   */
117  private RemoveUseless m_removeUseless;
118
119  /**
120   * Save instances at each node in an M5 tree for visualization purposes.
121   */
122  protected boolean m_saveInstances = false;
123
124  /**
125   * Make a regression tree/rule instead of a model tree/rule
126   */
127  protected boolean m_regressionTree;
128
129  /**
130   * Do not prune tree/rules
131   */
132  protected boolean m_useUnpruned = false;
133
134  /**
135   * The minimum number of instances to allow at a leaf node
136   */
137  protected double m_minNumInstances = 4;
138
139  /**
140   * Constructor
141   */
142  public M5Base() {
143    m_generateRules = false;
144    m_unsmoothedPredictions = false;
145    m_useUnpruned = false;
146    m_minNumInstances = 4;
147  }
148
149  /**
150   * returns information about the classifier
151   * @return a description suitable for
152   * displaying in the explorer/experimenter gui
153   */
154  public String globalInfo() {
155    return 
156        "M5Base. Implements base routines for generating M5 Model trees and " 
157      + "rules\n"
158      + "The original algorithm M5 was invented by R. Quinlan and Yong Wang "
159      + "made improvements.\n\n"
160      + "For more information see:\n\n"
161      + getTechnicalInformation().toString();
162  }
163
164  /**
165   * Returns an instance of a TechnicalInformation object, containing
166   * detailed information about the technical background of this class,
167   * e.g., paper reference or book this class is based on.
168   *
169   * @return the technical information about this class
170   */
171  public TechnicalInformation getTechnicalInformation() {
172    TechnicalInformation        result;
173    TechnicalInformation        additional;
174   
175    result = new TechnicalInformation(Type.INPROCEEDINGS);
176    result.setValue(Field.AUTHOR, "Ross J. Quinlan");
177    result.setValue(Field.TITLE, "Learning with Continuous Classes");
178    result.setValue(Field.BOOKTITLE, "5th Australian Joint Conference on Artificial Intelligence");
179    result.setValue(Field.YEAR, "1992");
180    result.setValue(Field.PAGES, "343-348");
181    result.setValue(Field.PUBLISHER, "World Scientific");
182    result.setValue(Field.ADDRESS, "Singapore");
183   
184    additional = result.add(Type.INPROCEEDINGS);
185    additional.setValue(Field.AUTHOR, "Y. Wang and I. H. Witten");
186    additional.setValue(Field.TITLE, "Induction of model trees for predicting continuous classes");
187    additional.setValue(Field.BOOKTITLE, "Poster papers of the 9th European Conference on Machine Learning");
188    additional.setValue(Field.YEAR, "1997");
189    additional.setValue(Field.PUBLISHER, "Springer");
190   
191    return result;
192  }
193
194  /**
195   * Returns an enumeration describing the available options
196   *
197   * @return an enumeration of all the available options
198   */
199  public Enumeration listOptions() {
200    Vector newVector = new Vector(4);
201
202    newVector.addElement(new Option("\tUse unpruned tree/rules", 
203                                    "N", 0, "-N"));
204
205    newVector.addElement(new Option("\tUse unsmoothed predictions", 
206                                    "U", 0, "-U"));
207
208    newVector.addElement(new Option("\tBuild regression tree/rule rather "
209                                    +"than a model tree/rule", 
210                                    "R", 0, "-R"));
211
212    newVector.addElement(new Option("\tSet minimum number of instances "
213                                    +"per leaf\n\t(default 4)",
214                                    "M",1,"-M <minimum number of instances>"));
215    return newVector.elements();
216  } 
217
218  /**
219   * Parses a given list of options. <p/>
220   *
221   * Valid options are:<p>
222   *
223   * -U <br>
224   * Use unsmoothed predictions. <p>
225   *
226   * -R <br>
227   * Build a regression tree rather than a model tree. <p>
228   *
229   * @param options the list of options as an array of strings
230   * @throws Exception if an option is not supported
231   */
232  public void setOptions(String[] options) throws Exception {
233    setUnpruned(Utils.getFlag('N', options));
234    setUseUnsmoothed(Utils.getFlag('U', options));
235    setBuildRegressionTree(Utils.getFlag('R', options));
236    String optionString = Utils.getOption('M', options);
237    if (optionString.length() != 0) {
238      setMinNumInstances((new Double(optionString)).doubleValue());
239    }
240    Utils.checkForRemainingOptions(options);
241  } 
242
243  /**
244   * Gets the current settings of the classifier.
245   *
246   * @return an array of strings suitable for passing to setOptions
247   */
248  public String[] getOptions() {
249    String[] options = new String[5];
250    int current = 0;
251
252    if (getUnpruned()) {
253      options[current++] = "-N";
254    }
255
256    if (getUseUnsmoothed()) {
257      options[current++] = "-U";
258    } 
259
260    if (getBuildRegressionTree()) {
261      options[current++] = "-R";
262    }
263
264    options[current++] = "-M"; 
265    options[current++] = ""+getMinNumInstances();
266
267    while (current < options.length) {
268      options[current++] = "";
269    } 
270    return options;
271  } 
272
273  /**
274   * Returns the tip text for this property
275   *
276   * @return            tip text for this property suitable for
277   *                    displaying in the explorer/experimenter gui
278   */
279  public String unprunedTipText() {
280    return "Whether unpruned tree/rules are to be generated.";
281  }
282
283  /**
284   * Use unpruned tree/rules
285   *
286   * @param unpruned true if unpruned tree/rules are to be generated
287   */
288  public void setUnpruned(boolean unpruned) {
289    m_useUnpruned = unpruned;
290  }
291
292  /**
293   * Get whether unpruned tree/rules are being generated
294   *
295   * @return true if unpruned tree/rules are to be generated
296   */
297  public boolean getUnpruned() {
298    return m_useUnpruned;
299  }
300
301  /**
302   * Returns the tip text for this property
303   *
304   * @return            tip text for this property suitable for
305   *                    displaying in the explorer/experimenter gui
306   */
307  public String generateRulesTipText() {
308    return "Whether to generate rules (decision list) rather than a tree.";
309  }
310
311  /**
312   * Generate rules (decision list) rather than a tree
313   *
314   * @param u true if rules are to be generated
315   */
316  protected void setGenerateRules(boolean u) {
317    m_generateRules = u;
318  } 
319
320  /**
321   * get whether rules are being generated rather than a tree
322   *
323   * @return true if rules are to be generated
324   */
325  protected boolean getGenerateRules() {
326    return m_generateRules;
327  } 
328
329  /**
330   * Returns the tip text for this property
331   *
332   * @return            tip text for this property suitable for
333   *                    displaying in the explorer/experimenter gui
334   */
335  public String useUnsmoothedTipText() {
336    return "Whether to use unsmoothed predictions.";
337  }
338
339  /**
340   * Use unsmoothed predictions
341   *
342   * @param s true if unsmoothed predictions are to be used
343   */
344  public void setUseUnsmoothed(boolean s) {
345    m_unsmoothedPredictions = s;
346  } 
347
348  /**
349   * Get whether or not smoothing is being used
350   *
351   * @return true if unsmoothed predictions are to be used
352   */
353  public boolean getUseUnsmoothed() {
354    return m_unsmoothedPredictions;
355  } 
356
357  /**
358   * Returns the tip text for this property
359   *
360   * @return            tip text for this property suitable for
361   *                    displaying in the explorer/experimenter gui
362   */
363  public String buildRegressionTreeTipText() {
364    return "Whether to generate a regression tree/rule instead of a model tree/rule.";
365  }
366
367  /**
368   * Get the value of regressionTree.
369   *
370   * @return Value of regressionTree.
371   */
372  public boolean getBuildRegressionTree() {
373   
374    return m_regressionTree;
375  }
376 
377  /**
378   * Set the value of regressionTree.
379   *
380   * @param newregressionTree Value to assign to regressionTree.
381   */
382  public void setBuildRegressionTree(boolean newregressionTree) {
383   
384    m_regressionTree = newregressionTree;
385  }
386
387  /**
388   * Returns the tip text for this property
389   *
390   * @return            tip text for this property suitable for
391   *                    displaying in the explorer/experimenter gui
392   */
393  public String minNumInstancesTipText() {
394    return "The minimum number of instances to allow at a leaf node.";
395  }
396
397  /**
398   * Set the minimum number of instances to allow at a leaf node
399   *
400   * @param minNum the minimum number of instances
401   */
402  public void setMinNumInstances(double minNum) {
403    m_minNumInstances = minNum;
404  }
405
406  /**
407   * Get the minimum number of instances to allow at a leaf node
408   *
409   * @return a <code>double</code> value
410   */
411  public double getMinNumInstances() {
412    return m_minNumInstances;
413  }
414
415  /**
416   * Returns default capabilities of the classifier, i.e., of LinearRegression.
417   *
418   * @return      the capabilities of this classifier
419   */
420  public Capabilities getCapabilities() {
421    return new LinearRegression().getCapabilities();
422  }
423
424  /**
425   * Generates the classifier.
426   *
427   * @param data set of instances serving as training data
428   * @throws Exception if the classifier has not been generated
429   * successfully
430   */
431  public void buildClassifier(Instances data) throws Exception {
432    // can classifier handle the data?
433    getCapabilities().testWithFail(data);
434
435    // remove instances with missing class
436    data = new Instances(data);
437    data.deleteWithMissingClass();
438   
439    m_instances = new Instances(data);
440
441    m_replaceMissing = new ReplaceMissingValues();
442    m_replaceMissing.setInputFormat(m_instances);
443    m_instances = Filter.useFilter(m_instances, m_replaceMissing);
444
445    m_nominalToBinary = new NominalToBinary();
446    m_nominalToBinary.setInputFormat(m_instances);
447    m_instances = Filter.useFilter(m_instances, m_nominalToBinary);
448
449    m_removeUseless = new RemoveUseless();
450    m_removeUseless.setInputFormat(m_instances);
451    m_instances = Filter.useFilter(m_instances, m_removeUseless);
452   
453    m_instances.randomize(new Random(1));
454
455    m_ruleSet = new FastVector();
456
457    Rule tempRule;
458
459    if (m_generateRules) {
460      Instances tempInst = m_instances;
461     
462      do {
463        tempRule = new Rule();
464        tempRule.setSmoothing(!m_unsmoothedPredictions);
465        tempRule.setRegressionTree(m_regressionTree);
466        tempRule.setUnpruned(m_useUnpruned);
467        tempRule.setSaveInstances(false);
468        tempRule.setMinNumInstances(m_minNumInstances);
469        tempRule.buildClassifier(tempInst);
470        m_ruleSet.addElement(tempRule);
471        //      System.err.println("Built rule : "+tempRule.toString());
472        tempInst = tempRule.notCoveredInstances();
473      } while (tempInst.numInstances() > 0);
474    } else {
475      // just build a single tree
476      tempRule = new Rule();
477
478      tempRule.setUseTree(true);
479      //      tempRule.setGrowFullTree(true);
480      tempRule.setSmoothing(!m_unsmoothedPredictions);
481      tempRule.setSaveInstances(m_saveInstances);
482      tempRule.setRegressionTree(m_regressionTree);
483      tempRule.setUnpruned(m_useUnpruned);
484      tempRule.setMinNumInstances(m_minNumInstances);
485
486      Instances temp_train;
487
488      temp_train = m_instances;
489
490      tempRule.buildClassifier(temp_train);
491
492      m_ruleSet.addElement(tempRule);     
493
494      // save space
495      m_instances = new Instances(m_instances, 0);
496      //      System.err.print(tempRule.m_topOfTree.treeToString(0));
497    } 
498  } 
499
500  /**
501   * Calculates a prediction for an instance using a set of rules
502   * or an M5 model tree
503   *
504   * @param inst the instance whos class value is to be predicted
505   * @return the prediction
506   * @throws Exception if a prediction can't be made.
507   */
508  public double classifyInstance(Instance inst) throws Exception {
509    Rule   temp;
510    double prediction = 0;
511    boolean success = false;
512
513    m_replaceMissing.input(inst);
514    inst = m_replaceMissing.output();
515    m_nominalToBinary.input(inst);
516    inst = m_nominalToBinary.output();
517    m_removeUseless.input(inst);
518    inst = m_removeUseless.output();
519
520    if (m_ruleSet == null) {
521      throw new Exception("Classifier has not been built yet!");
522    } 
523
524    if (!m_generateRules) {
525      temp = (Rule) m_ruleSet.elementAt(0);
526      return temp.classifyInstance(inst);
527    } 
528
529    boolean cont;
530    int     i;
531
532    for (i = 0; i < m_ruleSet.size(); i++) {
533      cont = false;
534      temp = (Rule) m_ruleSet.elementAt(i);
535
536      try {
537        prediction = temp.classifyInstance(inst);
538        success = true;
539      } catch (Exception e) {
540        cont = true;
541      } 
542
543      if (!cont) {
544        break;
545      } 
546    } 
547
548    if (!success) {
549      System.out.println("Error in predicting (DecList)");
550    } 
551    return prediction;
552  } 
553
554  /**
555   * Returns a description of the classifier
556   *
557   * @return a description of the classifier as a String
558   */
559  public String toString() {
560    StringBuffer text = new StringBuffer();
561    Rule         temp;
562
563    if (m_ruleSet == null) {
564      return "Classifier hasn't been built yet!";
565    } 
566
567    if (m_generateRules) {
568      text.append("M5 "
569                  + ((m_useUnpruned == true)
570                     ? "unpruned "
571                     : "pruned ")
572                  + ((m_regressionTree == true) 
573                     ?  "regression "
574                     : "model ")
575                  + "rules ");
576
577      if (!m_unsmoothedPredictions) {
578        text.append("\n(using smoothed linear models) ");
579      }
580
581      text.append(":\n");
582
583      text.append("Number of Rules : " + m_ruleSet.size() + "\n\n");
584
585      for (int j = 0; j < m_ruleSet.size(); j++) {
586        temp = (Rule) m_ruleSet.elementAt(j);
587
588        text.append("Rule: " + (j + 1) + "\n");
589        text.append(temp.toString());
590      } 
591    } else {
592      temp = (Rule) m_ruleSet.elementAt(0);
593      text.append(temp.toString());
594    } 
595    return text.toString();
596  } 
597
598  /**
599   * Returns an enumeration of the additional measure names
600   * @return an enumeration of the measure names
601   */
602  public Enumeration enumerateMeasures() {
603    Vector newVector = new Vector(1);
604    newVector.addElement("measureNumRules");
605    return newVector.elements();
606  }
607
608  /**
609   * Returns the value of the named measure
610   * @param additionalMeasureName the name of the measure to query for its value
611   * @return the value of the named measure
612   * @throws Exception if the named measure is not supported
613   */
614  public double getMeasure(String additionalMeasureName) 
615    {
616    if (additionalMeasureName.compareToIgnoreCase("measureNumRules") == 0) {
617      return measureNumRules();
618    } else {
619      throw new IllegalArgumentException(additionalMeasureName
620                                         + " not supported (M5)");
621    }
622  }
623
624  /**
625   * return the number of rules
626   * @return the number of rules (same as # linear models &
627   * # leaves in the tree)
628   */
629  public double measureNumRules() {
630    if (m_generateRules) {
631      return m_ruleSet.size();
632    }
633    return ((Rule)m_ruleSet.elementAt(0)).m_topOfTree.numberOfLinearModels();
634  }
635
636  public RuleNode getM5RootNode() {
637    Rule temp = (Rule) m_ruleSet.elementAt(0);
638    return temp.getM5RootNode();
639  }
640}
Note: See TracBrowser for help on using the repository browser.