source: src/main/java/weka/classifiers/trees/J48graft.java @ 21

Last change on this file since 21 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 21.1 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    J48graft.java
19 *    Copyright (C) 2007 Geoff Webb & Janice Boughton
20 *    (adapted from code written by Eibe Frank).
21 */
22
23package weka.classifiers.trees;
24
25import weka.classifiers.Classifier;
26import weka.classifiers.AbstractClassifier;
27import weka.classifiers.Sourcable;
28import weka.classifiers.trees.j48.BinC45ModelSelection;
29import weka.classifiers.trees.j48.C45ModelSelection;
30import weka.classifiers.trees.j48.C45PruneableClassifierTreeG;
31import weka.classifiers.trees.j48.ClassifierTree;
32import weka.classifiers.trees.j48.ModelSelection;
33import weka.core.AdditionalMeasureProducer;
34import weka.core.Capabilities;
35import weka.core.Drawable;
36import weka.core.Instance;
37import weka.core.Instances;
38import weka.core.Matchable;
39import weka.core.Option;
40import weka.core.OptionHandler;
41import weka.core.RevisionUtils;
42import weka.core.Summarizable;
43import weka.core.TechnicalInformation;
44import weka.core.TechnicalInformationHandler;
45import weka.core.Utils;
46import weka.core.WeightedInstancesHandler;
47import weka.core.TechnicalInformation.Field;
48import weka.core.TechnicalInformation.Type;
49
50import java.util.Enumeration;
51import java.util.Vector;
52
53/**
54 <!-- globalinfo-start -->
55 * Class for generating a grafted (pruned or unpruned) C4.5 decision tree. For more information, see<br/>
56 * <br/>
57 * Geoff Webb: Decision Tree Grafting From the All-Tests-But-One Partition. In: , San Francisco, CA, 1999.
58 * <p/>
59 <!-- globalinfo-end -->
60 *
61 <!-- technical-bibtex-start -->
62 * BibTeX:
63 * <pre>
64 * &#64;inproceedings{Webb1999,
65 *    address = {San Francisco, CA},
66 *    author = {Geoff Webb},
67 *    publisher = {Morgan Kaufmann},
68 *    title = {Decision Tree Grafting From the All-Tests-But-One Partition},
69 *    year = {1999}
70 * }
71 * </pre>
72 * <p/>
73 <!-- technical-bibtex-end -->
74 *
75 <!-- options-start -->
76 * Valid options are: <p/>
77 *
78 * <pre> -U
79 *  Use unpruned tree.</pre>
80 *
81 * <pre> -C &lt;pruning confidence&gt;
82 *  Set confidence threshold for pruning.
83 *  (default 0.25)</pre>
84 *
85 * <pre> -M &lt;minimum number of instances&gt;
86 *  Set minimum number of instances per leaf.
87 *  (default 2)</pre>
88 *
89 * <pre> -B
90 *  Use binary splits only.</pre>
91 *
92 * <pre> -S
93 *  Don't perform subtree raising.</pre>
94 *
95 * <pre> -L
96 *  Do not clean up after the tree has been built.</pre>
97 *
98 * <pre> -A
99 *  Laplace smoothing for predicted probabilities.  (note: this option only affects initial tree; grafting process always uses laplace).</pre>
100 *
101 * <pre> -E
102 *  Relabel when grafting.</pre>
103 *
104 <!-- options-end -->
105 *
106 * @author Janice Boughton (jrbought@csse.monash.edu.au)
107 *  (based on J48.java written by Eibe Frank)
108 * @version $Revision: 6088 $
109 */
110public class J48graft 
111  extends AbstractClassifier
112  implements OptionHandler, Drawable, Matchable, Sourcable, 
113             WeightedInstancesHandler, Summarizable,
114             AdditionalMeasureProducer, TechnicalInformationHandler {
115
116  /** for serialization */
117  static final long serialVersionUID = 8823716098042427799L;
118
119  /** The decision tree */
120  private ClassifierTree m_root;
121
122  /** Unpruned tree? */
123  private boolean m_unpruned = false;
124
125  /** Confidence level */
126  private float m_CF = 0.25f;
127
128  /** Minimum number of instances */
129  private int m_minNumObj = 2;
130
131  /** Determines whether probabilities are smoothed using
132      Laplace correction when predictions are generated */
133  private boolean m_useLaplace = false;
134
135  /** Number of folds for reduced error pruning. */
136  private int m_numFolds = 3;
137
138  /** Binary splits on nominal attributes? */
139  private boolean m_binarySplits = false;
140
141  /** Subtree raising to be performed? */
142  private boolean m_subtreeRaising = true;
143
144  /** Cleanup after the tree has been built. */
145  private boolean m_noCleanup = false;
146
147  /** relabel instances when grafting */
148  private boolean m_relabel = false;
149
150  /**
151   * Returns a string describing classifier
152   * @return a description suitable for
153   * displaying in the explorer/experimenter gui
154   */
155  public String globalInfo() {
156    return  "Class for generating a grafted (pruned or unpruned) C4.5 "
157      + "decision tree. For more information, see\n\n"
158      + getTechnicalInformation().toString();
159  }
160
161  /**
162   * Returns an instance of a TechnicalInformation object, containing
163   * detailed information about the technical background of this class,
164   * e.g., paper reference or book this class is based on.
165   *
166   * @return the technical information about this class
167   */
168  public TechnicalInformation getTechnicalInformation() {
169    TechnicalInformation        result;
170
171    result = new TechnicalInformation(Type.INPROCEEDINGS);
172    result.setValue(Field.AUTHOR, "Geoff Webb");
173    result.setValue(Field.YEAR, "1999");
174    result.setValue(Field.TITLE, "Decision Tree Grafting From the All-Tests-But-One Partition");
175    result.setValue(Field.PUBLISHER, "Morgan Kaufmann");
176    result.setValue(Field.ADDRESS, "San Francisco, CA");
177
178    return result;
179  }
180
181  /**
182   * Returns default capabilities of the classifier.
183   *
184   * @return      the capabilities of this classifier
185   */
186  public Capabilities getCapabilities() {
187    Capabilities      result;
188
189    try {
190     result = new C45PruneableClassifierTreeG(null, !m_unpruned, m_CF, m_subtreeRaising, m_relabel, !m_noCleanup).getCapabilities();
191    }
192    catch (Exception e) {
193      result = new Capabilities(this);
194      result.disableAll();
195    }
196
197    result.setOwner(this);
198
199    return result;
200  }
201
202  /**
203   * Generates the classifier.
204   *
205   * @param instances the data to train the classifier with
206   * @throws Exception if classifier can't be built successfully
207   */
208  public void buildClassifier(Instances instances)
209       throws Exception {
210
211    ModelSelection modSelection;
212
213    if (m_binarySplits)
214      modSelection = new BinC45ModelSelection(m_minNumObj, instances, true);
215    else
216      modSelection = new C45ModelSelection(m_minNumObj, instances, true);
217      m_root = new C45PruneableClassifierTreeG(modSelection, 
218                              !m_unpruned, m_CF, m_subtreeRaising, 
219                               m_relabel, !m_noCleanup);
220    m_root.buildClassifier(instances);
221
222    if (m_binarySplits) {
223      ((BinC45ModelSelection)modSelection).cleanup();
224    } else {
225      ((C45ModelSelection)modSelection).cleanup();
226    }
227  }
228
229  /**
230   * Classifies an instance.
231   *
232   * @param instance the instance to classify
233   * @return the classification for the instance
234   * @throws Exception if instance can't be classified successfully
235   */
236  public double classifyInstance(Instance instance) throws Exception {
237
238    return m_root.classifyInstance(instance);
239  }
240
241  /**
242   * Returns class probabilities for an instance.
243   *
244   * @param instance the instance to calculate the class probabilities for
245   * @return the class probabilities
246   * @throws Exception if distribution can't be computed successfully
247   */
248  public final double [] distributionForInstance(Instance instance) 
249       throws Exception {
250
251    return m_root.distributionForInstance(instance, m_useLaplace);
252  }
253
254  /**
255   *  Returns the type of graph this classifier
256   *  represents.
257   *  @return Drawable.TREE
258   */   
259  public int graphType() {
260      return Drawable.TREE;
261  }
262
263  /**
264   * Returns graph describing the tree.
265   *
266   * @return the graph describing the tree
267   * @throws Exception if graph can't be computed
268   */
269  public String graph() throws Exception {
270
271    return m_root.graph();
272  }
273
274  /**
275   * Returns tree in prefix order.
276   *
277   * @return the tree in prefix order
278   * @throws Exception if something goes wrong
279   */
280  public String prefix() throws Exception {
281   
282    return m_root.prefix();
283  }
284
285
286  /**
287   * Returns tree as an if-then statement.
288   *
289   * @param className the name of the Java class
290   * @return the tree as a Java if-then type statement
291   * @throws Exception if something goes wrong
292   */
293  public String toSource(String className) throws Exception {
294
295    StringBuffer [] source = m_root.toSource(className);
296    return 
297    "class " + className + " {\n\n"
298    +"  public static double classify(Object [] i)\n"
299    +"    throws Exception {\n\n"
300    +"    double p = Double.NaN;\n"
301    + source[0]  // Assignment code
302    +"    return p;\n"
303    +"  }\n"
304    + source[1]  // Support code
305    +"}\n";
306  }
307
308  /**
309   * Returns an enumeration describing the available options.
310   *
311   * Valid options are: <p>
312   *
313   * -U <br>
314   * Use unpruned tree.<p>
315   *
316   * -C confidence <br>
317   * Set confidence threshold for pruning. (Default: 0.25) <p>
318   *
319   * -M number <br>
320   * Set minimum number of instances per leaf. (Default: 2) <p>
321   *
322   * -B <br>
323   * Use binary splits for nominal attributes. <p>
324   *
325   * -S <br>
326   * Don't perform subtree raising. <p>
327   *
328   * -L <br>
329   * Do not clean up after the tree has been built.
330   *
331   * -A <br>
332   * If set, Laplace smoothing is used for predicted probabilites.
333   *  (note: this option only affects initial tree; grafting process always uses laplace). <p>
334   *
335   * -E <br>
336   * Allow relabelling when grafting. <p>
337   *
338   * @return an enumeration of all the available options.
339   */
340  public Enumeration listOptions() {
341
342    Vector newVector = new Vector(9);
343
344    newVector.
345       addElement(new Option("\tUse unpruned tree.",
346                              "U", 0, "-U"));
347    newVector.
348       addElement(new Option("\tSet confidence threshold for pruning.\n" +
349                             "\t(default 0.25)",
350                             "C", 1, "-C <pruning confidence>"));
351    newVector.
352       addElement(new Option("\tSet minimum number of instances per leaf.\n" +
353                              "\t(default 2)",
354                              "M", 1, "-M <minimum number of instances>"));
355    newVector.
356       addElement(new Option("\tUse binary splits only.",
357                              "B", 0, "-B"));
358    newVector.
359       addElement(new Option("\tDon't perform subtree raising.",
360                              "S", 0, "-S"));
361    newVector.
362       addElement(new Option("\tDo not clean up after the tree has been built.",
363                              "L", 0, "-L"));
364    newVector.
365       addElement(new Option("\tLaplace smoothing for predicted probabilities.  (note: this option only affects initial tree; grafting process always uses laplace).",
366 
367                              "A", 0, "-A"));
368    newVector.
369       addElement(new Option("\tRelabel when grafting.",
370                             "E", 0, "-E"));
371    return newVector.elements();
372  }
373
374  /**
375   * Parses a given list of options.
376   *
377   <!-- options-start -->
378   * Valid options are: <p/>
379   *
380   * <pre> -U
381   *  Use unpruned tree.</pre>
382   *
383   * <pre> -C &lt;pruning confidence&gt;
384   *  Set confidence threshold for pruning.
385   *  (default 0.25)</pre>
386   *
387   * <pre> -M &lt;minimum number of instances&gt;
388   *  Set minimum number of instances per leaf.
389   *  (default 2)</pre>
390   *
391   * <pre> -B
392   *  Use binary splits only.</pre>
393   *
394   * <pre> -S
395   *  Don't perform subtree raising.</pre>
396   *
397   * <pre> -L
398   *  Do not clean up after the tree has been built.</pre>
399   *
400   * <pre> -A
401   *  Laplace smoothing for predicted probabilities.  (note: this option only affects initial tree; grafting process always uses laplace).</pre>
402   *
403   * <pre> -E
404   *  Relabel when grafting.</pre>
405   *
406   <!-- options-end -->
407   *
408   * @param options the list of options as an array of strings
409   * @throws Exception if an option is not supported
410   */
411  public void setOptions(String[] options) throws Exception {
412 
413    // Other options
414    String minNumString = Utils.getOption('M', options);
415    if (minNumString.length() != 0) {
416      m_minNumObj = Integer.parseInt(minNumString);
417    } else {
418      m_minNumObj = 2;
419    }
420    m_binarySplits = Utils.getFlag('B', options);
421    m_useLaplace = Utils.getFlag('A', options);
422
423    // Pruning options
424    m_unpruned = Utils.getFlag('U', options);
425    m_subtreeRaising = !Utils.getFlag('S', options);
426    m_noCleanup = Utils.getFlag('L', options);
427                if ((m_unpruned) && (!m_subtreeRaising)) {
428      throw new Exception("Subtree raising doesn't need to be unset for unpruned tree!");
429    }
430    m_relabel = Utils.getFlag('E', options);
431    String confidenceString = Utils.getOption('C', options);
432    if (confidenceString.length() != 0) {
433      if (m_unpruned) {
434        throw new Exception("Doesn't make sense to change confidence for unpruned "
435                            +"tree!");
436      } else {
437        m_CF = (new Float(confidenceString)).floatValue();
438        if ((m_CF <= 0) || (m_CF >= 1)) {
439          throw new Exception("Confidence has to be greater than zero and smaller " +
440                              "than one!");
441        }
442      }
443    } else {
444      m_CF = 0.25f;
445    }
446  }
447
448  /**
449   * Gets the current settings of the Classifier.
450   *
451   * @return an array of strings suitable for passing to setOptions
452   */
453  public String [] getOptions() {
454
455    String [] options = new String [10];
456    int current = 0;
457
458    if (m_noCleanup) {
459      options[current++] = "-L";
460    }
461    if (m_unpruned) {
462      options[current++] = "-U";
463    } else {
464      if (!m_subtreeRaising) {
465        options[current++] = "-S";
466      }
467      options[current++] = "-C"; options[current++] = "" + m_CF;
468    }
469    if (m_binarySplits) {
470      options[current++] = "-B";
471    }
472    options[current++] = "-M"; options[current++] = "" + m_minNumObj;
473    if (m_useLaplace) {
474      options[current++] = "-A";
475    }
476
477    if(m_relabel) {
478       options[current++] = "-E";
479    }
480
481    while (current < options.length) {
482      options[current++] = "";
483    }
484    return options;
485  }
486
487  /**
488   * Returns the tip text for this property
489   * @return tip text for this property suitable for
490   * displaying in the explorer/experimenter gui
491   */
492  public String useLaplaceTipText() {
493    return "Whether counts at leaves are smoothed based on Laplace.";
494  }
495
496  /**
497   * Get the value of useLaplace.
498   *
499   * @return Value of useLaplace.
500   */
501  public boolean getUseLaplace() {
502   
503    return m_useLaplace;
504  }
505 
506  /**
507   * Set the value of useLaplace.
508   *
509   * @param newuseLaplace Value to assign to useLaplace.
510   */
511  public void setUseLaplace(boolean newuseLaplace) {
512   
513    m_useLaplace = newuseLaplace;
514  }
515 
516  /**
517   * Returns a description of the classifier.
518   *
519   * @return a description of the classifier
520   */
521  public String toString() {
522
523    if (m_root == null) {
524      return "No classifier built";
525    }
526    if (m_unpruned)
527      return "J48graft unpruned tree\n------------------\n" + m_root.toString();
528    else
529      return "J48graft pruned tree\n------------------\n" + m_root.toString();
530  }
531
532  /**
533   * Returns a superconcise version of the model
534   *
535   * @return a summary of the model
536   */
537  public String toSummaryString() {
538
539    return "Number of leaves: " + m_root.numLeaves() + "\n"
540         + "Size of the tree: " + m_root.numNodes() + "\n";
541  }
542
543  /**
544   * Returns the size of the tree
545   * @return the size of the tree
546   */
547  public double measureTreeSize() {
548    return m_root.numNodes();
549  }
550
551  /**
552   * Returns the number of leaves
553   * @return the number of leaves
554   */
555  public double measureNumLeaves() {
556    return m_root.numLeaves();
557  }
558
559  /**
560   * Returns the number of rules (same as number of leaves)
561   * @return the number of rules
562   */
563  public double measureNumRules() {
564    return m_root.numLeaves();
565  }
566
567  /**
568   * Returns an enumeration of the additional measure names
569   * @return an enumeration of the measure names
570   */
571  public Enumeration enumerateMeasures() {
572    Vector newVector = new Vector(3);
573    newVector.addElement("measureTreeSize");
574    newVector.addElement("measureNumLeaves");
575    newVector.addElement("measureNumRules");
576    return newVector.elements();
577  }
578
579  /**
580   * Returns the value of the named measure
581   * @param additionalMeasureName the name of the measure to query for its value
582   * @return the value of the named measure
583   * @throws IllegalArgumentException if the named measure is not supported
584   */
585  public double getMeasure(String additionalMeasureName) {
586    if (additionalMeasureName.compareTo("measureNumRules") == 0) {
587      return measureNumRules();
588    } else if (additionalMeasureName.compareTo("measureTreeSize") == 0) {
589      return measureTreeSize();
590    } else if (additionalMeasureName.compareTo("measureNumLeaves") == 0) {
591      return measureNumLeaves();
592    } else {
593      throw new IllegalArgumentException(additionalMeasureName
594                          + " not supported (j48)");
595    }
596  }
597
598  /**
599   * Returns the tip text for this property
600   * @return tip text for this property suitable for
601   * displaying in the explorer/experimenter gui
602   */
603  public String unprunedTipText() {
604    return "Whether pruning is performed.";
605  }
606
607  /**
608   * Get the value of unpruned.
609   *
610   * @return Value of unpruned.
611   */
612  public boolean getUnpruned() {
613
614    return m_unpruned;
615  }
616
617  /**
618   * Set the value of unpruned.
619   * @param v  Value to assign to unpruned.
620   */
621  public void setUnpruned(boolean v) {
622
623    m_unpruned = v;
624  }
625
626  /**
627   * Returns the tip text for this property
628   * @return tip text for this property suitable for
629   * displaying in the explorer/experimenter gui
630   */
631  public String relabelTipText() {
632    return "Whether relabelling is allowed during grafting.";
633  }
634
635  /**
636   * Get the value of relabelling
637   *
638   * @return Value of relabelling.
639   */
640  public boolean getRelabel() {
641
642    return m_relabel;
643  }
644
645  /**
646   * Set the value of relabelling.
647   *
648   * @param v  Value to assign to relabelling flag.
649   */
650  public void setRelabel(boolean v) {
651    m_relabel = v;
652  }
653
654  /**
655   * Returns the tip text for this property
656   * @return tip text for this property suitable for
657   * displaying in the explorer/experimenter gui
658   */
659  public String confidenceFactorTipText() {
660    return "The confidence factor used for pruning (smaller values incur "
661      + "more pruning).";
662  }
663
664  /**
665   * Get the value of CF.
666   *
667   * @return Value of CF.
668   */
669  public float getConfidenceFactor() {
670   
671    return m_CF;
672  }
673 
674  /**
675   * Set the value of CF.
676   *
677   * @param v  Value to assign to CF.
678   */
679  public void setConfidenceFactor(float v) {
680   
681    m_CF = v;
682  }
683
684  /**
685   * Returns the tip text for this property
686   * @return tip text for this property suitable for
687   * displaying in the explorer/experimenter gui
688   */
689  public String minNumObjTipText() {
690    return "The minimum number of instances per leaf.";
691  }
692
693  /**
694   * Get the value of minNumObj.
695   *
696   * @return Value of minNumObj.
697   */
698  public int getMinNumObj() {
699   
700    return m_minNumObj;
701  }
702 
703  /**
704   * Set the value of minNumObj.
705   *
706   * @param v  Value to assign to minNumObj.
707   */
708  public void setMinNumObj(int v) {
709   
710    m_minNumObj = v;
711  }
712
713  /**
714   * Returns the tip text for this property
715   * @return tip text for this property suitable for
716   * displaying in the explorer/experimenter gui
717   */
718  public String binarySplitsTipText() {
719    return "Whether to use binary splits on nominal attributes when "
720      + "building the trees.";
721  }
722 
723  /**
724   * Get the value of binarySplits.
725   *
726   * @return Value of binarySplits.
727   */
728  public boolean getBinarySplits() {
729
730    return m_binarySplits;
731  }
732
733  /**
734   * Set the value of binarySplits.
735   *
736   * @param v  Value to assign to binarySplits.
737   */
738  public void setBinarySplits(boolean v) {
739   
740    m_binarySplits = v;
741  }
742 
743  /**
744   * Returns the tip text for this property
745   * @return tip text for this property suitable for
746   * displaying in the explorer/experimenter gui
747   */
748  public String subtreeRaisingTipText() {
749    return "Whether to consider the subtree raising operation when pruning.";
750  }
751 
752  /**
753   * Get the value of subtreeRaising.
754   *
755   * @return Value of subtreeRaising.
756   */
757  public boolean getSubtreeRaising() {
758   
759    return m_subtreeRaising;
760  }
761 
762  /**
763   * Set the value of subtreeRaising.
764   *
765   * @param v  Value to assign to subtreeRaising.
766   */
767  public void setSubtreeRaising(boolean v) {
768   
769    m_subtreeRaising = v;
770  }
771
772  /**
773   * Returns the tip text for this property
774   * @return tip text for this property suitable for
775   * displaying in the explorer/experimenter gui
776   */
777  public String saveInstanceDataTipText() {
778    return "Whether to save the training data for visualization.";
779  }
780
781  /**
782   * Check whether instance data is to be saved.
783   *
784   * @return true if instance data is saved
785   */
786  public boolean getSaveInstanceData() {
787   
788    return m_noCleanup;
789  }
790 
791  /**
792   * Set whether instance data is to be saved.
793   * @param v true if instance data is to be saved
794   */
795  public void setSaveInstanceData(boolean v) {
796
797    m_noCleanup = v;
798  }
799 
800  /**
801   * Returns the revision string.
802   *
803   * @return            the revision
804   */
805  public String getRevision() {
806    return RevisionUtils.extract("$Revision: 6088 $");
807  }
808 
809  /**
810   * Main method for testing this class
811   *
812   * @param argv the commandline options
813   */
814  public static void main(String [] argv){
815    runClassifier(new J48graft(), argv);
816  }
817}
818
Note: See TracBrowser for help on using the repository browser.