/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* J48graft.java
* Copyright (C) 2007 Geoff Webb & Janice Boughton
* (adapted from code written by Eibe Frank).
*/
package weka.classifiers.trees;
import weka.classifiers.Classifier;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.Sourcable;
import weka.classifiers.trees.j48.BinC45ModelSelection;
import weka.classifiers.trees.j48.C45ModelSelection;
import weka.classifiers.trees.j48.C45PruneableClassifierTreeG;
import weka.classifiers.trees.j48.ClassifierTree;
import weka.classifiers.trees.j48.ModelSelection;
import weka.core.AdditionalMeasureProducer;
import weka.core.Capabilities;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Matchable;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Summarizable;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import java.util.Enumeration;
import java.util.Vector;
/**
* Class for generating a grafted (pruned or unpruned) C4.5 decision tree. For more information, see
*
* Geoff Webb: Decision Tree Grafting From the All-Tests-But-One Partition. In: , San Francisco, CA, 1999.
*
* @inproceedings{Webb1999, * address = {San Francisco, CA}, * author = {Geoff Webb}, * publisher = {Morgan Kaufmann}, * title = {Decision Tree Grafting From the All-Tests-But-One Partition}, * year = {1999} * } ** * * Valid options are: * *
-U * Use unpruned tree.* *
-C <pruning confidence> * Set confidence threshold for pruning. * (default 0.25)* *
-M <minimum number of instances> * Set minimum number of instances per leaf. * (default 2)* *
-B * Use binary splits only.* *
-S * Don't perform subtree raising.* *
-L * Do not clean up after the tree has been built.* *
-A * Laplace smoothing for predicted probabilities. (note: this option only affects initial tree; grafting process always uses laplace).* *
-E * Relabel when grafting.* * * @author Janice Boughton (jrbought@csse.monash.edu.au) * (based on J48.java written by Eibe Frank) * @version $Revision: 6088 $ */ public class J48graft extends AbstractClassifier implements OptionHandler, Drawable, Matchable, Sourcable, WeightedInstancesHandler, Summarizable, AdditionalMeasureProducer, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 8823716098042427799L; /** The decision tree */ private ClassifierTree m_root; /** Unpruned tree? */ private boolean m_unpruned = false; /** Confidence level */ private float m_CF = 0.25f; /** Minimum number of instances */ private int m_minNumObj = 2; /** Determines whether probabilities are smoothed using Laplace correction when predictions are generated */ private boolean m_useLaplace = false; /** Number of folds for reduced error pruning. */ private int m_numFolds = 3; /** Binary splits on nominal attributes? */ private boolean m_binarySplits = false; /** Subtree raising to be performed? */ private boolean m_subtreeRaising = true; /** Cleanup after the tree has been built. */ private boolean m_noCleanup = false; /** relabel instances when grafting */ private boolean m_relabel = false; /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Class for generating a grafted (pruned or unpruned) C4.5 " + "decision tree. For more information, see\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Geoff Webb"); result.setValue(Field.YEAR, "1999"); result.setValue(Field.TITLE, "Decision Tree Grafting From the All-Tests-But-One Partition"); result.setValue(Field.PUBLISHER, "Morgan Kaufmann"); result.setValue(Field.ADDRESS, "San Francisco, CA"); return result; } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result; try { result = new C45PruneableClassifierTreeG(null, !m_unpruned, m_CF, m_subtreeRaising, m_relabel, !m_noCleanup).getCapabilities(); } catch (Exception e) { result = new Capabilities(this); result.disableAll(); } result.setOwner(this); return result; } /** * Generates the classifier. * * @param instances the data to train the classifier with * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { ModelSelection modSelection; if (m_binarySplits) modSelection = new BinC45ModelSelection(m_minNumObj, instances, true); else modSelection = new C45ModelSelection(m_minNumObj, instances, true); m_root = new C45PruneableClassifierTreeG(modSelection, !m_unpruned, m_CF, m_subtreeRaising, m_relabel, !m_noCleanup); m_root.buildClassifier(instances); if (m_binarySplits) { ((BinC45ModelSelection)modSelection).cleanup(); } else { ((C45ModelSelection)modSelection).cleanup(); } } /** * Classifies an instance. * * @param instance the instance to classify * @return the classification for the instance * @throws Exception if instance can't be classified successfully */ public double classifyInstance(Instance instance) throws Exception { return m_root.classifyInstance(instance); } /** * Returns class probabilities for an instance. * * @param instance the instance to calculate the class probabilities for * @return the class probabilities * @throws Exception if distribution can't be computed successfully */ public final double [] distributionForInstance(Instance instance) throws Exception { return m_root.distributionForInstance(instance, m_useLaplace); } /** * Returns the type of graph this classifier * represents. * @return Drawable.TREE */ public int graphType() { return Drawable.TREE; } /** * Returns graph describing the tree. * * @return the graph describing the tree * @throws Exception if graph can't be computed */ public String graph() throws Exception { return m_root.graph(); } /** * Returns tree in prefix order. * * @return the tree in prefix order * @throws Exception if something goes wrong */ public String prefix() throws Exception { return m_root.prefix(); } /** * Returns tree as an if-then statement. * * @param className the name of the Java class * @return the tree as a Java if-then type statement * @throws Exception if something goes wrong */ public String toSource(String className) throws Exception { StringBuffer [] source = m_root.toSource(className); return "class " + className + " {\n\n" +" public static double classify(Object [] i)\n" +" throws Exception {\n\n" +" double p = Double.NaN;\n" + source[0] // Assignment code +" return p;\n" +" }\n" + source[1] // Support code +"}\n"; } /** * Returns an enumeration describing the available options. * * Valid options are:
*
* -U
* Use unpruned tree.
*
* -C confidence
* Set confidence threshold for pruning. (Default: 0.25)
*
* -M number
* Set minimum number of instances per leaf. (Default: 2)
*
* -B
* Use binary splits for nominal attributes.
*
* -S
* Don't perform subtree raising.
*
* -L
* Do not clean up after the tree has been built.
*
* -A
* If set, Laplace smoothing is used for predicted probabilites.
* (note: this option only affects initial tree; grafting process always uses laplace).
*
* -E
* Allow relabelling when grafting.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(9);
newVector.
addElement(new Option("\tUse unpruned tree.",
"U", 0, "-U"));
newVector.
addElement(new Option("\tSet confidence threshold for pruning.\n" +
"\t(default 0.25)",
"C", 1, "-C -U
* Use unpruned tree.
*
* -C <pruning confidence>
* Set confidence threshold for pruning.
* (default 0.25)
*
* -M <minimum number of instances>
* Set minimum number of instances per leaf.
* (default 2)
*
* -B
* Use binary splits only.
*
* -S
* Don't perform subtree raising.
*
* -L
* Do not clean up after the tree has been built.
*
* -A
* Laplace smoothing for predicted probabilities. (note: this option only affects initial tree; grafting process always uses laplace).
*
* -E
* Relabel when grafting.
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
// Other options
String minNumString = Utils.getOption('M', options);
if (minNumString.length() != 0) {
m_minNumObj = Integer.parseInt(minNumString);
} else {
m_minNumObj = 2;
}
m_binarySplits = Utils.getFlag('B', options);
m_useLaplace = Utils.getFlag('A', options);
// Pruning options
m_unpruned = Utils.getFlag('U', options);
m_subtreeRaising = !Utils.getFlag('S', options);
m_noCleanup = Utils.getFlag('L', options);
if ((m_unpruned) && (!m_subtreeRaising)) {
throw new Exception("Subtree raising doesn't need to be unset for unpruned tree!");
}
m_relabel = Utils.getFlag('E', options);
String confidenceString = Utils.getOption('C', options);
if (confidenceString.length() != 0) {
if (m_unpruned) {
throw new Exception("Doesn't make sense to change confidence for unpruned "
+"tree!");
} else {
m_CF = (new Float(confidenceString)).floatValue();
if ((m_CF <= 0) || (m_CF >= 1)) {
throw new Exception("Confidence has to be greater than zero and smaller " +
"than one!");
}
}
} else {
m_CF = 0.25f;
}
}
/**
* Gets the current settings of the Classifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [10];
int current = 0;
if (m_noCleanup) {
options[current++] = "-L";
}
if (m_unpruned) {
options[current++] = "-U";
} else {
if (!m_subtreeRaising) {
options[current++] = "-S";
}
options[current++] = "-C"; options[current++] = "" + m_CF;
}
if (m_binarySplits) {
options[current++] = "-B";
}
options[current++] = "-M"; options[current++] = "" + m_minNumObj;
if (m_useLaplace) {
options[current++] = "-A";
}
if(m_relabel) {
options[current++] = "-E";
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String useLaplaceTipText() {
return "Whether counts at leaves are smoothed based on Laplace.";
}
/**
* Get the value of useLaplace.
*
* @return Value of useLaplace.
*/
public boolean getUseLaplace() {
return m_useLaplace;
}
/**
* Set the value of useLaplace.
*
* @param newuseLaplace Value to assign to useLaplace.
*/
public void setUseLaplace(boolean newuseLaplace) {
m_useLaplace = newuseLaplace;
}
/**
* Returns a description of the classifier.
*
* @return a description of the classifier
*/
public String toString() {
if (m_root == null) {
return "No classifier built";
}
if (m_unpruned)
return "J48graft unpruned tree\n------------------\n" + m_root.toString();
else
return "J48graft pruned tree\n------------------\n" + m_root.toString();
}
/**
* Returns a superconcise version of the model
*
* @return a summary of the model
*/
public String toSummaryString() {
return "Number of leaves: " + m_root.numLeaves() + "\n"
+ "Size of the tree: " + m_root.numNodes() + "\n";
}
/**
* Returns the size of the tree
* @return the size of the tree
*/
public double measureTreeSize() {
return m_root.numNodes();
}
/**
* Returns the number of leaves
* @return the number of leaves
*/
public double measureNumLeaves() {
return m_root.numLeaves();
}
/**
* Returns the number of rules (same as number of leaves)
* @return the number of rules
*/
public double measureNumRules() {
return m_root.numLeaves();
}
/**
* Returns an enumeration of the additional measure names
* @return an enumeration of the measure names
*/
public Enumeration enumerateMeasures() {
Vector newVector = new Vector(3);
newVector.addElement("measureTreeSize");
newVector.addElement("measureNumLeaves");
newVector.addElement("measureNumRules");
return newVector.elements();
}
/**
* Returns the value of the named measure
* @param additionalMeasureName the name of the measure to query for its value
* @return the value of the named measure
* @throws IllegalArgumentException if the named measure is not supported
*/
public double getMeasure(String additionalMeasureName) {
if (additionalMeasureName.compareTo("measureNumRules") == 0) {
return measureNumRules();
} else if (additionalMeasureName.compareTo("measureTreeSize") == 0) {
return measureTreeSize();
} else if (additionalMeasureName.compareTo("measureNumLeaves") == 0) {
return measureNumLeaves();
} else {
throw new IllegalArgumentException(additionalMeasureName
+ " not supported (j48)");
}
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String unprunedTipText() {
return "Whether pruning is performed.";
}
/**
* Get the value of unpruned.
*
* @return Value of unpruned.
*/
public boolean getUnpruned() {
return m_unpruned;
}
/**
* Set the value of unpruned.
* @param v Value to assign to unpruned.
*/
public void setUnpruned(boolean v) {
m_unpruned = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String relabelTipText() {
return "Whether relabelling is allowed during grafting.";
}
/**
* Get the value of relabelling
*
* @return Value of relabelling.
*/
public boolean getRelabel() {
return m_relabel;
}
/**
* Set the value of relabelling.
*
* @param v Value to assign to relabelling flag.
*/
public void setRelabel(boolean v) {
m_relabel = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String confidenceFactorTipText() {
return "The confidence factor used for pruning (smaller values incur "
+ "more pruning).";
}
/**
* Get the value of CF.
*
* @return Value of CF.
*/
public float getConfidenceFactor() {
return m_CF;
}
/**
* Set the value of CF.
*
* @param v Value to assign to CF.
*/
public void setConfidenceFactor(float v) {
m_CF = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String minNumObjTipText() {
return "The minimum number of instances per leaf.";
}
/**
* Get the value of minNumObj.
*
* @return Value of minNumObj.
*/
public int getMinNumObj() {
return m_minNumObj;
}
/**
* Set the value of minNumObj.
*
* @param v Value to assign to minNumObj.
*/
public void setMinNumObj(int v) {
m_minNumObj = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String binarySplitsTipText() {
return "Whether to use binary splits on nominal attributes when "
+ "building the trees.";
}
/**
* Get the value of binarySplits.
*
* @return Value of binarySplits.
*/
public boolean getBinarySplits() {
return m_binarySplits;
}
/**
* Set the value of binarySplits.
*
* @param v Value to assign to binarySplits.
*/
public void setBinarySplits(boolean v) {
m_binarySplits = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String subtreeRaisingTipText() {
return "Whether to consider the subtree raising operation when pruning.";
}
/**
* Get the value of subtreeRaising.
*
* @return Value of subtreeRaising.
*/
public boolean getSubtreeRaising() {
return m_subtreeRaising;
}
/**
* Set the value of subtreeRaising.
*
* @param v Value to assign to subtreeRaising.
*/
public void setSubtreeRaising(boolean v) {
m_subtreeRaising = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String saveInstanceDataTipText() {
return "Whether to save the training data for visualization.";
}
/**
* Check whether instance data is to be saved.
*
* @return true if instance data is saved
*/
public boolean getSaveInstanceData() {
return m_noCleanup;
}
/**
* Set whether instance data is to be saved.
* @param v true if instance data is to be saved
*/
public void setSaveInstanceData(boolean v) {
m_noCleanup = v;
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 6088 $");
}
/**
* Main method for testing this class
*
* @param argv the commandline options
*/
public static void main(String [] argv){
runClassifier(new J48graft(), argv);
}
}