/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* REPTree.java
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.trees;
import weka.classifiers.Classifier;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.Sourcable;
import weka.classifiers.rules.ZeroR;
import weka.core.AdditionalMeasureProducer;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.ContingencyTables;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.core.Capabilities.Capability;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
/**
* Fast decision tree learner. Builds a decision/regression tree using information gain/variance and prunes it using reduced-error pruning (with backfitting). Only sorts values for numeric attributes once. Missing values are dealt with by splitting the corresponding instances into pieces (i.e. as in C4.5).
*
*
* Valid options are:
*
* -M <minimum number of instances>
* Set minimum number of instances per leaf (default 2).
*
* -V <minimum variance for split>
* Set minimum numeric class variance proportion
* of train variance for split (default 1e-3).
*
* -N <number of folds>
* Number of folds for reduced error pruning (default 3).
*
* -S <seed>
* Seed for random data shuffling (default 1).
*
* -P
* No pruning.
*
* -L
* Maximum tree depth (default -1, no maximum)
*
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @version $Revision: 5928 $
*/
public class REPTree
extends AbstractClassifier
implements OptionHandler, WeightedInstancesHandler, Drawable,
AdditionalMeasureProducer, Sourcable {
/** for serialization */
static final long serialVersionUID = -8562443428621539458L;
/** ZeroR model that is used if no attributes are present. */
protected ZeroR m_zeroR;
/**
* Returns a string describing classifier
* @return a description suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Fast decision tree learner. Builds a decision/regression tree using "
+ "information gain/variance and prunes it using reduced-error pruning "
+ "(with backfitting). Only sorts values for numeric attributes "
+ "once. Missing values are dealt with by splitting the corresponding "
+ "instances into pieces (i.e. as in C4.5).";
}
/** An inner class for building and storing the tree structure */
protected class Tree
implements Serializable, RevisionHandler {
/** for serialization */
static final long serialVersionUID = -1635481717888437935L;
/** The header information (for printing the tree). */
protected Instances m_Info = null;
/** The subtrees of this tree. */
protected Tree[] m_Successors;
/** The attribute to split on. */
protected int m_Attribute = -1;
/** The split point. */
protected double m_SplitPoint = Double.NaN;
/** The proportions of training instances going down each branch. */
protected double[] m_Prop = null;
/** Class probabilities from the training data in the nominal case.
Holds the mean in the numeric case. */
protected double[] m_ClassProbs = null;
/** The (unnormalized) class distribution in the nominal
case. Holds the sum of squared errors and the weight
in the numeric case. */
protected double[] m_Distribution = null;
/** Class distribution of hold-out set at node in the nominal case.
Straight sum of weights in the numeric case (i.e. array has
only one element. */
protected double[] m_HoldOutDist = null;
/** The hold-out error of the node. The number of miss-classified
instances in the nominal case, the sum of squared errors in the
numeric case. */
protected double m_HoldOutError = 0;
/**
* Computes class distribution of an instance using the tree.
*
* @param instance the instance to compute the distribution for
* @return the distribution
* @throws Exception if computation fails
*/
protected double[] distributionForInstance(Instance instance)
throws Exception {
double[] returnedDist = null;
if (m_Attribute > -1) {
// Node is not a leaf
if (instance.isMissing(m_Attribute)) {
// Value is missing
returnedDist = new double[m_Info.numClasses()];
// Split instance up
for (int i = 0; i < m_Successors.length; i++) {
double[] help =
m_Successors[i].distributionForInstance(instance);
if (help != null) {
for (int j = 0; j < help.length; j++) {
returnedDist[j] += m_Prop[i] * help[j];
}
}
}
} else if (m_Info.attribute(m_Attribute).isNominal()) {
// For nominal attributes
returnedDist = m_Successors[(int)instance.value(m_Attribute)].
distributionForInstance(instance);
} else {
// For numeric attributes
if (instance.value(m_Attribute) < m_SplitPoint) {
returnedDist =
m_Successors[0].distributionForInstance(instance);
} else {
returnedDist =
m_Successors[1].distributionForInstance(instance);
}
}
}
if ((m_Attribute == -1) || (returnedDist == null)) {
// Node is a leaf or successor is empty
return m_ClassProbs;
} else {
return returnedDist;
}
}
/**
* Returns a string containing java source code equivalent to the test
* made at this node. The instance being tested is called "i". This
* routine assumes to be called in the order of branching, enabling us to
* set the >= condition test (the last one) of a numeric splitpoint
* to just "true" (because being there in the flow implies that the
* previous less-than test failed).
*
* @param index index of the value tested
* @return a value of type 'String'
*/
public final String sourceExpression(int index) {
StringBuffer expr = null;
if (index < 0) {
return "i[" + m_Attribute + "] == null";
}
if (m_Info.attribute(m_Attribute).isNominal()) {
expr = new StringBuffer("i[");
expr.append(m_Attribute).append("]");
expr.append(".equals(\"").append(m_Info.attribute(m_Attribute)
.value(index)).append("\")");
} else {
expr = new StringBuffer("");
if (index == 0) {
expr.append("((Double)i[")
.append(m_Attribute).append("]).doubleValue() < ")
.append(m_SplitPoint);
} else {
expr.append("true");
}
}
return expr.toString();
}
/**
* Returns source code for the tree as if-then statements. The
* class is assigned to variable "p", and assumes the tested
* instance is named "i". The results are returned as two stringbuffers:
* a section of code for assignment of the class, and a section of
* code containing support code (eg: other support methods).
*
* TODO: If the outputted source code encounters a missing value
* for the evaluated attribute, it stops branching and uses the
* class distribution of the current node to decide the return value.
* This is unlike the behaviour of distributionForInstance().
*
* @param className the classname that this static classifier has
* @param parent parent node of the current node
* @return an array containing two stringbuffers, the first string containing
* assignment code, and the second containing source for support code.
* @throws Exception if something goes wrong
*/
public StringBuffer [] toSource(String className, Tree parent)
throws Exception {
StringBuffer [] result = new StringBuffer[2];
double[] currentProbs;
if(m_ClassProbs == null)
currentProbs = parent.m_ClassProbs;
else
currentProbs = m_ClassProbs;
long printID = nextID();
// Is this a leaf?
if (m_Attribute == -1) {
result[0] = new StringBuffer(" p = ");
if(m_Info.classAttribute().isNumeric())
result[0].append(currentProbs[0]);
else {
result[0].append(Utils.maxIndex(currentProbs));
}
result[0].append(";\n");
result[1] = new StringBuffer("");
} else {
StringBuffer text = new StringBuffer("");
StringBuffer atEnd = new StringBuffer("");
text.append(" static double N")
.append(Integer.toHexString(this.hashCode()) + printID)
.append("(Object []i) {\n")
.append(" double p = Double.NaN;\n");
text.append(" /* " + m_Info.attribute(m_Attribute).name() + " */\n");
// Missing attribute?
text.append(" if (" + this.sourceExpression(-1) + ") {\n")
.append(" p = ");
if(m_Info.classAttribute().isNumeric())
text.append(currentProbs[0] + ";\n");
else
text.append(Utils.maxIndex(currentProbs) + ";\n");
text.append(" } ");
// Branching of the tree
for (int i=0;i" +
"N" +
Integer.toHexString(m_Successors[i].hashCode()) +
" [label=\"");
if (m_Info.attribute(m_Attribute).isNumeric()) {
if (i == 0) {
text.append(" < " +
Utils.doubleToString(m_SplitPoint, 2));
} else {
text.append(" >= " +
Utils.doubleToString(m_SplitPoint, 2));
}
} else {
text.append(" = " + m_Info.attribute(m_Attribute).value(i));
}
text.append("\"]\n");
num = m_Successors[i].toGraph(text, num, this);
}
}
return num;
}
/**
* Outputs description of a leaf node.
*
* @param parent the parent of the node
* @return the description of the node
* @throws Exception if generation fails
*/
protected String leafString(Tree parent) throws Exception {
if (m_Info.classAttribute().isNumeric()) {
double classMean;
if (m_ClassProbs == null) {
classMean = parent.m_ClassProbs[0];
} else {
classMean = m_ClassProbs[0];
}
StringBuffer buffer = new StringBuffer();
buffer.append(" : " + Utils.doubleToString(classMean, 2));
double avgError = 0;
if (m_Distribution[1] > 0) {
avgError = m_Distribution[0] / m_Distribution[1];
}
buffer.append(" (" +
Utils.doubleToString(m_Distribution[1], 2) + "/" +
Utils.doubleToString(avgError, 2)
+ ")");
avgError = 0;
if (m_HoldOutDist[0] > 0) {
avgError = m_HoldOutError / m_HoldOutDist[0];
}
buffer.append(" [" +
Utils.doubleToString(m_HoldOutDist[0], 2) + "/" +
Utils.doubleToString(avgError, 2)
+ "]");
return buffer.toString();
} else {
int maxIndex;
if (m_ClassProbs == null) {
maxIndex = Utils.maxIndex(parent.m_ClassProbs);
} else {
maxIndex = Utils.maxIndex(m_ClassProbs);
}
return " : " + m_Info.classAttribute().value(maxIndex) +
" (" + Utils.doubleToString(Utils.sum(m_Distribution), 2) +
"/" +
Utils.doubleToString((Utils.sum(m_Distribution) -
m_Distribution[maxIndex]), 2) + ")" +
" [" + Utils.doubleToString(Utils.sum(m_HoldOutDist), 2) + "/" +
Utils.doubleToString((Utils.sum(m_HoldOutDist) -
m_HoldOutDist[maxIndex]), 2) + "]";
}
}
/**
* Recursively outputs the tree.
*
* @param level the current level
* @param parent the current parent
* @return the generated substree
*/
protected String toString(int level, Tree parent) {
try {
StringBuffer text = new StringBuffer();
if (m_Attribute == -1) {
// Output leaf info
return leafString(parent);
} else if (m_Info.attribute(m_Attribute).isNominal()) {
// For nominal attributes
for (int i = 0; i < m_Successors.length; i++) {
text.append("\n");
for (int j = 0; j < level; j++) {
text.append("| ");
}
text.append(m_Info.attribute(m_Attribute).name() + " = " +
m_Info.attribute(m_Attribute).value(i));
text.append(m_Successors[i].toString(level + 1, this));
}
} else {
// For numeric attributes
text.append("\n");
for (int j = 0; j < level; j++) {
text.append("| ");
}
text.append(m_Info.attribute(m_Attribute).name() + " < " +
Utils.doubleToString(m_SplitPoint, 2));
text.append(m_Successors[0].toString(level + 1, this));
text.append("\n");
for (int j = 0; j < level; j++) {
text.append("| ");
}
text.append(m_Info.attribute(m_Attribute).name() + " >= " +
Utils.doubleToString(m_SplitPoint, 2));
text.append(m_Successors[1].toString(level + 1, this));
}
return text.toString();
} catch (Exception e) {
e.printStackTrace();
return "Decision tree: tree can't be printed";
}
}
/**
* Recursively generates a tree.
*
* @param sortedIndices the sorted indices of the instances
* @param weights the weights of the instances
* @param data the data to work with
* @param totalWeight
* @param classProbs the class probabilities
* @param header the header of the data
* @param minNum the minimum number of instances in a leaf
* @param minVariance
* @param depth the current depth of the tree
* @param maxDepth the maximum allowed depth of the tree
* @throws Exception if generation fails
*/
protected void buildTree(int[][] sortedIndices, double[][] weights,
Instances data, double totalWeight,
double[] classProbs, Instances header,
double minNum, double minVariance,
int depth, int maxDepth)
throws Exception {
// Store structure of dataset, set minimum number of instances
// and make space for potential info from pruning data
m_Info = header;
m_HoldOutDist = new double[data.numClasses()];
// Make leaf if there are no training instances
int helpIndex = 0;
if (data.classIndex() == 0) {
helpIndex = 1;
}
if (sortedIndices[helpIndex].length == 0) {
if (data.classAttribute().isNumeric()) {
m_Distribution = new double[2];
} else {
m_Distribution = new double[data.numClasses()];
}
m_ClassProbs = null;
return;
}
double priorVar = 0;
if (data.classAttribute().isNumeric()) {
// Compute prior variance
double totalSum = 0, totalSumSquared = 0, totalSumOfWeights = 0;
for (int i = 0; i < sortedIndices[helpIndex].length; i++) {
Instance inst = data.instance(sortedIndices[helpIndex][i]);
totalSum += inst.classValue() * weights[helpIndex][i];
totalSumSquared +=
inst.classValue() * inst.classValue() * weights[helpIndex][i];
totalSumOfWeights += weights[helpIndex][i];
}
priorVar = singleVariance(totalSum, totalSumSquared,
totalSumOfWeights);
}
// Check if node doesn't contain enough instances, is pure
// or the maximum tree depth is reached
m_ClassProbs = new double[classProbs.length];
System.arraycopy(classProbs, 0, m_ClassProbs, 0, classProbs.length);
if ((totalWeight < (2 * minNum)) ||
// Nominal case
(data.classAttribute().isNominal() &&
Utils.eq(m_ClassProbs[Utils.maxIndex(m_ClassProbs)],
Utils.sum(m_ClassProbs))) ||
// Numeric case
(data.classAttribute().isNumeric() &&
((priorVar / totalWeight) < minVariance)) ||
// Check tree depth
((m_MaxDepth >= 0) && (depth >= maxDepth))) {
// Make leaf
m_Attribute = -1;
if (data.classAttribute().isNominal()) {
// Nominal case
m_Distribution = new double[m_ClassProbs.length];
for (int i = 0; i < m_ClassProbs.length; i++) {
m_Distribution[i] = m_ClassProbs[i];
}
Utils.normalize(m_ClassProbs);
} else {
// Numeric case
m_Distribution = new double[2];
m_Distribution[0] = priorVar;
m_Distribution[1] = totalWeight;
}
return;
}
// Compute class distributions and value of splitting
// criterion for each attribute
double[] vals = new double[data.numAttributes()];
double[][][] dists = new double[data.numAttributes()][0][0];
double[][] props = new double[data.numAttributes()][0];
double[][] totalSubsetWeights = new double[data.numAttributes()][0];
double[] splits = new double[data.numAttributes()];
if (data.classAttribute().isNominal()) {
// Nominal case
for (int i = 0; i < data.numAttributes(); i++) {
if (i != data.classIndex()) {
splits[i] = distribution(props, dists, i, sortedIndices[i],
weights[i], totalSubsetWeights, data);
vals[i] = gain(dists[i], priorVal(dists[i]));
}
}
} else {
// Numeric case
for (int i = 0; i < data.numAttributes(); i++) {
if (i != data.classIndex()) {
splits[i] =
numericDistribution(props, dists, i, sortedIndices[i],
weights[i], totalSubsetWeights, data,
vals);
}
}
}
// Find best attribute
m_Attribute = Utils.maxIndex(vals);
int numAttVals = dists[m_Attribute].length;
// Check if there are at least two subsets with
// required minimum number of instances
int count = 0;
for (int i = 0; i < numAttVals; i++) {
if (totalSubsetWeights[m_Attribute][i] >= minNum) {
count++;
}
if (count > 1) {
break;
}
}
// Any useful split found?
if ((vals[m_Attribute] > 0) && (count > 1)) {
// Build subtrees
m_SplitPoint = splits[m_Attribute];
m_Prop = props[m_Attribute];
int[][][] subsetIndices =
new int[numAttVals][data.numAttributes()][0];
double[][][] subsetWeights =
new double[numAttVals][data.numAttributes()][0];
splitData(subsetIndices, subsetWeights, m_Attribute, m_SplitPoint,
sortedIndices, weights, data);
m_Successors = new Tree[numAttVals];
for (int i = 0; i < numAttVals; i++) {
m_Successors[i] = new Tree();
m_Successors[i].
buildTree(subsetIndices[i], subsetWeights[i],
data, totalSubsetWeights[m_Attribute][i],
dists[m_Attribute][i], header, minNum,
minVariance, depth + 1, maxDepth);
}
} else {
// Make leaf
m_Attribute = -1;
}
// Normalize class counts
if (data.classAttribute().isNominal()) {
m_Distribution = new double[m_ClassProbs.length];
for (int i = 0; i < m_ClassProbs.length; i++) {
m_Distribution[i] = m_ClassProbs[i];
}
Utils.normalize(m_ClassProbs);
} else {
m_Distribution = new double[2];
m_Distribution[0] = priorVar;
m_Distribution[1] = totalWeight;
}
}
/**
* Computes size of the tree.
*
* @return the number of nodes
*/
protected int numNodes() {
if (m_Attribute == -1) {
return 1;
} else {
int size = 1;
for (int i = 0; i < m_Successors.length; i++) {
size += m_Successors[i].numNodes();
}
return size;
}
}
/**
* Splits instances into subsets.
*
* @param subsetIndices the sorted indices in the subset
* @param subsetWeights the weights of the subset
* @param att the attribute index
* @param splitPoint the split point for numeric attributes
* @param sortedIndices the sorted indices of the whole set
* @param weights the weights of the whole set
* @param data the data to work with
* @throws Exception if something goes wrong
*/
protected void splitData(int[][][] subsetIndices,
double[][][] subsetWeights,
int att, double splitPoint,
int[][] sortedIndices, double[][] weights,
Instances data) throws Exception {
int j;
int[] num;
// For each attribute
for (int i = 0; i < data.numAttributes(); i++) {
if (i != data.classIndex()) {
if (data.attribute(att).isNominal()) {
// For nominal attributes
num = new int[data.attribute(att).numValues()];
for (int k = 0; k < num.length; k++) {
subsetIndices[k][i] = new int[sortedIndices[i].length];
subsetWeights[k][i] = new double[sortedIndices[i].length];
}
for (j = 0; j < sortedIndices[i].length; j++) {
Instance inst = data.instance(sortedIndices[i][j]);
if (inst.isMissing(att)) {
// Split instance up
for (int k = 0; k < num.length; k++) {
if (m_Prop[k] > 0) {
subsetIndices[k][i][num[k]] = sortedIndices[i][j];
subsetWeights[k][i][num[k]] =
m_Prop[k] * weights[i][j];
num[k]++;
}
}
} else {
int subset = (int)inst.value(att);
subsetIndices[subset][i][num[subset]] =
sortedIndices[i][j];
subsetWeights[subset][i][num[subset]] = weights[i][j];
num[subset]++;
}
}
} else {
// For numeric attributes
num = new int[2];
for (int k = 0; k < 2; k++) {
subsetIndices[k][i] = new int[sortedIndices[i].length];
subsetWeights[k][i] = new double[weights[i].length];
}
for (j = 0; j < sortedIndices[i].length; j++) {
Instance inst = data.instance(sortedIndices[i][j]);
if (inst.isMissing(att)) {
// Split instance up
for (int k = 0; k < num.length; k++) {
if (m_Prop[k] > 0) {
subsetIndices[k][i][num[k]] = sortedIndices[i][j];
subsetWeights[k][i][num[k]] =
m_Prop[k] * weights[i][j];
num[k]++;
}
}
} else {
int subset = (inst.value(att) < splitPoint) ? 0 : 1;
subsetIndices[subset][i][num[subset]] =
sortedIndices[i][j];
subsetWeights[subset][i][num[subset]] = weights[i][j];
num[subset]++;
}
}
}
// Trim arrays
for (int k = 0; k < num.length; k++) {
int[] copy = new int[num[k]];
System.arraycopy(subsetIndices[k][i], 0, copy, 0, num[k]);
subsetIndices[k][i] = copy;
double[] copyWeights = new double[num[k]];
System.arraycopy(subsetWeights[k][i], 0,
copyWeights, 0, num[k]);
subsetWeights[k][i] = copyWeights;
}
}
}
}
/**
* Computes class distribution for an attribute.
*
* @param props
* @param dists
* @param att the attribute index
* @param sortedIndices the sorted indices of the instances
* @param weights the weights of the instances
* @param subsetWeights the weights of the subset
* @param data the data to work with
* @return the split point
* @throws Exception if computation fails
*/
protected double distribution(double[][] props,
double[][][] dists, int att,
int[] sortedIndices,
double[] weights,
double[][] subsetWeights,
Instances data)
throws Exception {
double splitPoint = Double.NaN;
Attribute attribute = data.attribute(att);
double[][] dist = null;
int i;
if (attribute.isNominal()) {
// For nominal attributes
dist = new double[attribute.numValues()][data.numClasses()];
for (i = 0; i < sortedIndices.length; i++) {
Instance inst = data.instance(sortedIndices[i]);
if (inst.isMissing(att)) {
break;
}
dist[(int)inst.value(att)][(int)inst.classValue()] += weights[i];
}
} else {
// For numeric attributes
double[][] currDist = new double[2][data.numClasses()];
dist = new double[2][data.numClasses()];
// Move all instances into second subset
for (int j = 0; j < sortedIndices.length; j++) {
Instance inst = data.instance(sortedIndices[j]);
if (inst.isMissing(att)) {
break;
}
currDist[1][(int)inst.classValue()] += weights[j];
}
double priorVal = priorVal(currDist);
System.arraycopy(currDist[1], 0, dist[1], 0, dist[1].length);
// Try all possible split points
double currSplit = data.instance(sortedIndices[0]).value(att);
double currVal, bestVal = -Double.MAX_VALUE;
for (i = 0; i < sortedIndices.length; i++) {
Instance inst = data.instance(sortedIndices[i]);
if (inst.isMissing(att)) {
break;
}
if (inst.value(att) > currSplit) {
currVal = gain(currDist, priorVal);
if (currVal > bestVal) {
bestVal = currVal;
splitPoint = (inst.value(att) + currSplit) / 2.0;
for (int j = 0; j < currDist.length; j++) {
System.arraycopy(currDist[j], 0, dist[j], 0,
dist[j].length);
}
}
}
currSplit = inst.value(att);
currDist[0][(int)inst.classValue()] += weights[i];
currDist[1][(int)inst.classValue()] -= weights[i];
}
}
// Compute weights
props[att] = new double[dist.length];
for (int k = 0; k < props[att].length; k++) {
props[att][k] = Utils.sum(dist[k]);
}
if (!(Utils.sum(props[att]) > 0)) {
for (int k = 0; k < props[att].length; k++) {
props[att][k] = 1.0 / (double)props[att].length;
}
} else {
Utils.normalize(props[att]);
}
// Distribute counts
while (i < sortedIndices.length) {
Instance inst = data.instance(sortedIndices[i]);
for (int j = 0; j < dist.length; j++) {
dist[j][(int)inst.classValue()] += props[att][j] * weights[i];
}
i++;
}
// Compute subset weights
subsetWeights[att] = new double[dist.length];
for (int j = 0; j < dist.length; j++) {
subsetWeights[att][j] += Utils.sum(dist[j]);
}
// Return distribution and split point
dists[att] = dist;
return splitPoint;
}
/**
* Computes class distribution for an attribute.
*
* @param props
* @param dists
* @param att the attribute index
* @param sortedIndices the sorted indices of the instances
* @param weights the weights of the instances
* @param subsetWeights the weights of the subset
* @param data the data to work with
* @param vals
* @return the split point
* @throws Exception if computation fails
*/
protected double numericDistribution(double[][] props,
double[][][] dists, int att,
int[] sortedIndices,
double[] weights,
double[][] subsetWeights,
Instances data,
double[] vals)
throws Exception {
double splitPoint = Double.NaN;
Attribute attribute = data.attribute(att);
double[][] dist = null;
double[] sums = null;
double[] sumSquared = null;
double[] sumOfWeights = null;
double totalSum = 0, totalSumSquared = 0, totalSumOfWeights = 0;
int i;
if (attribute.isNominal()) {
// For nominal attributes
sums = new double[attribute.numValues()];
sumSquared = new double[attribute.numValues()];
sumOfWeights = new double[attribute.numValues()];
int attVal;
for (i = 0; i < sortedIndices.length; i++) {
Instance inst = data.instance(sortedIndices[i]);
if (inst.isMissing(att)) {
break;
}
attVal = (int)inst.value(att);
sums[attVal] += inst.classValue() * weights[i];
sumSquared[attVal] +=
inst.classValue() * inst.classValue() * weights[i];
sumOfWeights[attVal] += weights[i];
}
totalSum = Utils.sum(sums);
totalSumSquared = Utils.sum(sumSquared);
totalSumOfWeights = Utils.sum(sumOfWeights);
} else {
// For numeric attributes
sums = new double[2];
sumSquared = new double[2];
sumOfWeights = new double[2];
double[] currSums = new double[2];
double[] currSumSquared = new double[2];
double[] currSumOfWeights = new double[2];
// Move all instances into second subset
for (int j = 0; j < sortedIndices.length; j++) {
Instance inst = data.instance(sortedIndices[j]);
if (inst.isMissing(att)) {
break;
}
currSums[1] += inst.classValue() * weights[j];
currSumSquared[1] +=
inst.classValue() * inst.classValue() * weights[j];
currSumOfWeights[1] += weights[j];
}
totalSum = currSums[1];
totalSumSquared = currSumSquared[1];
totalSumOfWeights = currSumOfWeights[1];
sums[1] = currSums[1];
sumSquared[1] = currSumSquared[1];
sumOfWeights[1] = currSumOfWeights[1];
// Try all possible split points
double currSplit = data.instance(sortedIndices[0]).value(att);
double currVal, bestVal = Double.MAX_VALUE;
for (i = 0; i < sortedIndices.length; i++) {
Instance inst = data.instance(sortedIndices[i]);
if (inst.isMissing(att)) {
break;
}
if (inst.value(att) > currSplit) {
currVal = variance(currSums, currSumSquared, currSumOfWeights);
if (currVal < bestVal) {
bestVal = currVal;
splitPoint = (inst.value(att) + currSplit) / 2.0;
for (int j = 0; j < 2; j++) {
sums[j] = currSums[j];
sumSquared[j] = currSumSquared[j];
sumOfWeights[j] = currSumOfWeights[j];
}
}
}
currSplit = inst.value(att);
double classVal = inst.classValue() * weights[i];
double classValSquared = inst.classValue() * classVal;
currSums[0] += classVal;
currSumSquared[0] += classValSquared;
currSumOfWeights[0] += weights[i];
currSums[1] -= classVal;
currSumSquared[1] -= classValSquared;
currSumOfWeights[1] -= weights[i];
}
}
// Compute weights
props[att] = new double[sums.length];
for (int k = 0; k < props[att].length; k++) {
props[att][k] = sumOfWeights[k];
}
if (!(Utils.sum(props[att]) > 0)) {
for (int k = 0; k < props[att].length; k++) {
props[att][k] = 1.0 / (double)props[att].length;
}
} else {
Utils.normalize(props[att]);
}
// Distribute counts for missing values
while (i < sortedIndices.length) {
Instance inst = data.instance(sortedIndices[i]);
for (int j = 0; j < sums.length; j++) {
sums[j] += props[att][j] * inst.classValue() * weights[i];
sumSquared[j] += props[att][j] * inst.classValue() *
inst.classValue() * weights[i];
sumOfWeights[j] += props[att][j] * weights[i];
}
totalSum += inst.classValue() * weights[i];
totalSumSquared +=
inst.classValue() * inst.classValue() * weights[i];
totalSumOfWeights += weights[i];
i++;
}
// Compute final distribution
dist = new double[sums.length][data.numClasses()];
for (int j = 0; j < sums.length; j++) {
if (sumOfWeights[j] > 0) {
dist[j][0] = sums[j] / sumOfWeights[j];
} else {
dist[j][0] = totalSum / totalSumOfWeights;
}
}
// Compute variance gain
double priorVar =
singleVariance(totalSum, totalSumSquared, totalSumOfWeights);
double var = variance(sums, sumSquared, sumOfWeights);
double gain = priorVar - var;
// Return distribution and split point
subsetWeights[att] = sumOfWeights;
dists[att] = dist;
vals[att] = gain;
return splitPoint;
}
/**
* Computes variance for subsets.
*
* @param s
* @param sS
* @param sumOfWeights
* @return the variance
*/
protected double variance(double[] s, double[] sS,
double[] sumOfWeights) {
double var = 0;
for (int i = 0; i < s.length; i++) {
if (sumOfWeights[i] > 0) {
var += singleVariance(s[i], sS[i], sumOfWeights[i]);
}
}
return var;
}
/**
* Computes the variance for a single set
*
* @param s
* @param sS
* @param weight the weight
* @return the variance
*/
protected double singleVariance(double s, double sS, double weight) {
return sS - ((s * s) / weight);
}
/**
* Computes value of splitting criterion before split.
*
* @param dist
* @return the splitting criterion
*/
protected double priorVal(double[][] dist) {
return ContingencyTables.entropyOverColumns(dist);
}
/**
* Computes value of splitting criterion after split.
*
* @param dist
* @param priorVal the splitting criterion
* @return the gain after splitting
*/
protected double gain(double[][] dist, double priorVal) {
return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
/**
* Prunes the tree using the hold-out data (bottom-up).
*
* @return the error
* @throws Exception if pruning fails for some reason
*/
protected double reducedErrorPrune() throws Exception {
// Is node leaf ?
if (m_Attribute == -1) {
return m_HoldOutError;
}
// Prune all sub trees
double errorTree = 0;
for (int i = 0; i < m_Successors.length; i++) {
errorTree += m_Successors[i].reducedErrorPrune();
}
// Replace sub tree with leaf if error doesn't get worse
if (errorTree >= m_HoldOutError) {
m_Attribute = -1;
m_Successors = null;
return m_HoldOutError;
} else {
return errorTree;
}
}
/**
* Inserts hold-out set into tree.
*
* @param data the data to insert
* @throws Exception if something goes wrong
*/
protected void insertHoldOutSet(Instances data) throws Exception {
for (int i = 0; i < data.numInstances(); i++) {
insertHoldOutInstance(data.instance(i), data.instance(i).weight(),
this);
}
}
/**
* Inserts an instance from the hold-out set into the tree.
*
* @param inst the instance to insert
* @param weight the weight of the instance
* @param parent the parent of the node
* @throws Exception if insertion fails
*/
protected void insertHoldOutInstance(Instance inst, double weight,
Tree parent) throws Exception {
// Insert instance into hold-out class distribution
if (inst.classAttribute().isNominal()) {
// Nominal case
m_HoldOutDist[(int)inst.classValue()] += weight;
int predictedClass = 0;
if (m_ClassProbs == null) {
predictedClass = Utils.maxIndex(parent.m_ClassProbs);
} else {
predictedClass = Utils.maxIndex(m_ClassProbs);
}
if (predictedClass != (int)inst.classValue()) {
m_HoldOutError += weight;
}
} else {
// Numeric case
m_HoldOutDist[0] += weight;
double diff = 0;
if (m_ClassProbs == null) {
diff = parent.m_ClassProbs[0] - inst.classValue();
} else {
diff = m_ClassProbs[0] - inst.classValue();
}
m_HoldOutError += diff * diff * weight;
}
// The process is recursive
if (m_Attribute != -1) {
// If node is not a leaf
if (inst.isMissing(m_Attribute)) {
// Distribute instance
for (int i = 0; i < m_Successors.length; i++) {
if (m_Prop[i] > 0) {
m_Successors[i].insertHoldOutInstance(inst, weight *
m_Prop[i], this);
}
}
} else {
if (m_Info.attribute(m_Attribute).isNominal()) {
// Treat nominal attributes
m_Successors[(int)inst.value(m_Attribute)].
insertHoldOutInstance(inst, weight, this);
} else {
// Treat numeric attributes
if (inst.value(m_Attribute) < m_SplitPoint) {
m_Successors[0].insertHoldOutInstance(inst, weight, this);
} else {
m_Successors[1].insertHoldOutInstance(inst, weight, this);
}
}
}
}
}
/**
* Inserts hold-out set into tree.
*
* @param data the data to insert
* @throws Exception if insertion fails
*/
protected void backfitHoldOutSet(Instances data) throws Exception {
for (int i = 0; i < data.numInstances(); i++) {
backfitHoldOutInstance(data.instance(i), data.instance(i).weight(),
this);
}
}
/**
* Inserts an instance from the hold-out set into the tree.
*
* @param inst the instance to insert
* @param weight the weight of the instance
* @param parent the parent node
* @throws Exception if insertion fails
*/
protected void backfitHoldOutInstance(Instance inst, double weight,
Tree parent) throws Exception {
// Insert instance into hold-out class distribution
if (inst.classAttribute().isNominal()) {
// Nominal case
if (m_ClassProbs == null) {
m_ClassProbs = new double[inst.numClasses()];
}
System.arraycopy(m_Distribution, 0, m_ClassProbs, 0, inst.numClasses());
m_ClassProbs[(int)inst.classValue()] += weight;
Utils.normalize(m_ClassProbs);
} else {
// Numeric case
if (m_ClassProbs == null) {
m_ClassProbs = new double[1];
}
m_ClassProbs[0] *= m_Distribution[1];
m_ClassProbs[0] += weight * inst.classValue();
m_ClassProbs[0] /= (m_Distribution[1] + weight);
}
// The process is recursive
if (m_Attribute != -1) {
// If node is not a leaf
if (inst.isMissing(m_Attribute)) {
// Distribute instance
for (int i = 0; i < m_Successors.length; i++) {
if (m_Prop[i] > 0) {
m_Successors[i].backfitHoldOutInstance(inst, weight *
m_Prop[i], this);
}
}
} else {
if (m_Info.attribute(m_Attribute).isNominal()) {
// Treat nominal attributes
m_Successors[(int)inst.value(m_Attribute)].
backfitHoldOutInstance(inst, weight, this);
} else {
// Treat numeric attributes
if (inst.value(m_Attribute) < m_SplitPoint) {
m_Successors[0].backfitHoldOutInstance(inst, weight, this);
} else {
m_Successors[1].backfitHoldOutInstance(inst, weight, this);
}
}
}
}
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 5928 $");
}
}
/** The Tree object */
protected Tree m_Tree = null;
/** Number of folds for reduced error pruning. */
protected int m_NumFolds = 3;
/** Seed for random data shuffling. */
protected int m_Seed = 1;
/** Don't prune */
protected boolean m_NoPruning = false;
/** The minimum number of instances per leaf. */
protected double m_MinNum = 2;
/** The minimum proportion of the total variance (over all the data)
required for split. */
protected double m_MinVarianceProp = 1e-3;
/** Upper bound on the tree depth */
protected int m_MaxDepth = -1;
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String noPruningTipText() {
return "Whether pruning is performed.";
}
/**
* Get the value of NoPruning.
*
* @return Value of NoPruning.
*/
public boolean getNoPruning() {
return m_NoPruning;
}
/**
* Set the value of NoPruning.
*
* @param newNoPruning Value to assign to NoPruning.
*/
public void setNoPruning(boolean newNoPruning) {
m_NoPruning = newNoPruning;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String minNumTipText() {
return "The minimum total weight of the instances in a leaf.";
}
/**
* Get the value of MinNum.
*
* @return Value of MinNum.
*/
public double getMinNum() {
return m_MinNum;
}
/**
* Set the value of MinNum.
*
* @param newMinNum Value to assign to MinNum.
*/
public void setMinNum(double newMinNum) {
m_MinNum = newMinNum;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String minVariancePropTipText() {
return "The minimum proportion of the variance on all the data " +
"that needs to be present at a node in order for splitting to " +
"be performed in regression trees.";
}
/**
* Get the value of MinVarianceProp.
*
* @return Value of MinVarianceProp.
*/
public double getMinVarianceProp() {
return m_MinVarianceProp;
}
/**
* Set the value of MinVarianceProp.
*
* @param newMinVarianceProp Value to assign to MinVarianceProp.
*/
public void setMinVarianceProp(double newMinVarianceProp) {
m_MinVarianceProp = newMinVarianceProp;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String seedTipText() {
return "The seed used for randomizing the data.";
}
/**
* Get the value of Seed.
*
* @return Value of Seed.
*/
public int getSeed() {
return m_Seed;
}
/**
* Set the value of Seed.
*
* @param newSeed Value to assign to Seed.
*/
public void setSeed(int newSeed) {
m_Seed = newSeed;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String numFoldsTipText() {
return "Determines the amount of data used for pruning. One fold is used for "
+ "pruning, the rest for growing the rules.";
}
/**
* Get the value of NumFolds.
*
* @return Value of NumFolds.
*/
public int getNumFolds() {
return m_NumFolds;
}
/**
* Set the value of NumFolds.
*
* @param newNumFolds Value to assign to NumFolds.
*/
public void setNumFolds(int newNumFolds) {
m_NumFolds = newNumFolds;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String maxDepthTipText() {
return "The maximum tree depth (-1 for no restriction).";
}
/**
* Get the value of MaxDepth.
*
* @return Value of MaxDepth.
*/
public int getMaxDepth() {
return m_MaxDepth;
}
/**
* Set the value of MaxDepth.
*
* @param newMaxDepth Value to assign to MaxDepth.
*/
public void setMaxDepth(int newMaxDepth) {
m_MaxDepth = newMaxDepth;
}
/**
* Lists the command-line options for this classifier.
*
* @return an enumeration over all commandline options
*/
public Enumeration listOptions() {
Vector newVector = new Vector(5);
newVector.
addElement(new Option("\tSet minimum number of instances per leaf " +
"(default 2).",
"M", 1, "-M "));
newVector.
addElement(new Option("\tSet minimum numeric class variance proportion\n" +
"\tof train variance for split (default 1e-3).",
"V", 1, "-V "));
newVector.
addElement(new Option("\tNumber of folds for reduced error pruning " +
"(default 3).",
"N", 1, "-N "));
newVector.
addElement(new Option("\tSeed for random data shuffling (default 1).",
"S", 1, "-S "));
newVector.
addElement(new Option("\tNo pruning.",
"P", 0, "-P"));
newVector.
addElement(new Option("\tMaximum tree depth (default -1, no maximum)",
"L", 1, "-L"));
return newVector.elements();
}
/**
* Gets options from this classifier.
*
* @return the options for the current setup
*/
public String[] getOptions() {
String [] options = new String [12];
int current = 0;
options[current++] = "-M";
options[current++] = "" + (int)getMinNum();
options[current++] = "-V";
options[current++] = "" + getMinVarianceProp();
options[current++] = "-N";
options[current++] = "" + getNumFolds();
options[current++] = "-S";
options[current++] = "" + getSeed();
options[current++] = "-L";
options[current++] = "" + getMaxDepth();
if (getNoPruning()) {
options[current++] = "-P";
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Parses a given list of options.
*
* Valid options are:
*
* -M <minimum number of instances>
* Set minimum number of instances per leaf (default 2).
*
* -V <minimum variance for split>
* Set minimum numeric class variance proportion
* of train variance for split (default 1e-3).
*
* -N <number of folds>
* Number of folds for reduced error pruning (default 3).
*
* -S <seed>
* Seed for random data shuffling (default 1).
*
* -P
* No pruning.
*
* -L
* Maximum tree depth (default -1, no maximum)
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String minNumString = Utils.getOption('M', options);
if (minNumString.length() != 0) {
m_MinNum = (double)Integer.parseInt(minNumString);
} else {
m_MinNum = 2;
}
String minVarString = Utils.getOption('V', options);
if (minVarString.length() != 0) {
m_MinVarianceProp = Double.parseDouble(minVarString);
} else {
m_MinVarianceProp = 1e-3;
}
String numFoldsString = Utils.getOption('N', options);
if (numFoldsString.length() != 0) {
m_NumFolds = Integer.parseInt(numFoldsString);
} else {
m_NumFolds = 3;
}
String seedString = Utils.getOption('S', options);
if (seedString.length() != 0) {
m_Seed = Integer.parseInt(seedString);
} else {
m_Seed = 1;
}
m_NoPruning = Utils.getFlag('P', options);
String depthString = Utils.getOption('L', options);
if (depthString.length() != 0) {
m_MaxDepth = Integer.parseInt(depthString);
} else {
m_MaxDepth = -1;
}
Utils.checkForRemainingOptions(options);
}
/**
* Computes size of the tree.
*
* @return the number of nodes
*/
public int numNodes() {
return m_Tree.numNodes();
}
/**
* Returns an enumeration of the additional measure names.
*
* @return an enumeration of the measure names
*/
public Enumeration enumerateMeasures() {
Vector newVector = new Vector(1);
newVector.addElement("measureTreeSize");
return newVector.elements();
}
/**
* Returns the value of the named measure.
*
* @param additionalMeasureName the name of the measure to query for its value
* @return the value of the named measure
* @throws IllegalArgumentException if the named measure is not supported
*/
public double getMeasure(String additionalMeasureName) {
if (additionalMeasureName.equalsIgnoreCase("measureTreeSize")) {
return (double) numNodes();
}
else {throw new IllegalArgumentException(additionalMeasureName
+ " not supported (REPTree)");
}
}
/**
* Returns default capabilities of the classifier.
*
* @return the capabilities of this classifier
*/
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.NUMERIC_ATTRIBUTES);
result.enable(Capability.DATE_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
// class
result.enable(Capability.NOMINAL_CLASS);
result.enable(Capability.NUMERIC_CLASS);
result.enable(Capability.DATE_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
return result;
}
/**
* Builds classifier.
*
* @param data the data to train with
* @throws Exception if building fails
*/
public void buildClassifier(Instances data) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(data);
// remove instances with missing class
data = new Instances(data);
data.deleteWithMissingClass();
Random random = new Random(m_Seed);
m_zeroR = null;
if (data.numAttributes() == 1) {
m_zeroR = new ZeroR();
m_zeroR.buildClassifier(data);
return;
}
// Randomize and stratify
data.randomize(random);
if (data.classAttribute().isNominal()) {
data.stratify(m_NumFolds);
}
// Split data into training and pruning set
Instances train = null;
Instances prune = null;
if (!m_NoPruning) {
train = data.trainCV(m_NumFolds, 0, random);
prune = data.testCV(m_NumFolds, 0);
} else {
train = data;
}
// Create array of sorted indices and weights
int[][] sortedIndices = new int[train.numAttributes()][0];
double[][] weights = new double[train.numAttributes()][0];
double[] vals = new double[train.numInstances()];
for (int j = 0; j < train.numAttributes(); j++) {
if (j != train.classIndex()) {
weights[j] = new double[train.numInstances()];
if (train.attribute(j).isNominal()) {
// Handling nominal attributes. Putting indices of
// instances with missing values at the end.
sortedIndices[j] = new int[train.numInstances()];
int count = 0;
for (int i = 0; i < train.numInstances(); i++) {
Instance inst = train.instance(i);
if (!inst.isMissing(j)) {
sortedIndices[j][count] = i;
weights[j][count] = inst.weight();
count++;
}
}
for (int i = 0; i < train.numInstances(); i++) {
Instance inst = train.instance(i);
if (inst.isMissing(j)) {
sortedIndices[j][count] = i;
weights[j][count] = inst.weight();
count++;
}
}
} else {
// Sorted indices are computed for numeric attributes
for (int i = 0; i < train.numInstances(); i++) {
Instance inst = train.instance(i);
vals[i] = inst.value(j);
}
sortedIndices[j] = Utils.sort(vals);
for (int i = 0; i < train.numInstances(); i++) {
weights[j][i] = train.instance(sortedIndices[j][i]).weight();
}
}
}
}
// Compute initial class counts
double[] classProbs = new double[train.numClasses()];
double totalWeight = 0, totalSumSquared = 0;
for (int i = 0; i < train.numInstances(); i++) {
Instance inst = train.instance(i);
if (data.classAttribute().isNominal()) {
classProbs[(int)inst.classValue()] += inst.weight();
totalWeight += inst.weight();
} else {
classProbs[0] += inst.classValue() * inst.weight();
totalSumSquared += inst.classValue() * inst.classValue() * inst.weight();
totalWeight += inst.weight();
}
}
m_Tree = new Tree();
double trainVariance = 0;
if (data.classAttribute().isNumeric()) {
trainVariance = m_Tree.
singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight;
classProbs[0] /= totalWeight;
}
// Build tree
m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs,
new Instances(train, 0), m_MinNum, m_MinVarianceProp *
trainVariance, 0, m_MaxDepth);
// Insert pruning data and perform reduced error pruning
if (!m_NoPruning) {
m_Tree.insertHoldOutSet(prune);
m_Tree.reducedErrorPrune();
m_Tree.backfitHoldOutSet(prune);
}
}
/**
* Computes class distribution of an instance using the tree.
*
* @param instance the instance to compute the distribution for
* @return the computed class probabilities
* @throws Exception if computation fails
*/
public double[] distributionForInstance(Instance instance)
throws Exception {
if (m_zeroR != null) {
return m_zeroR.distributionForInstance(instance);
} else {
return m_Tree.distributionForInstance(instance);
}
}
/**
* For getting a unique ID when outputting the tree source
* (hashcode isn't guaranteed unique)
*/
private static long PRINTED_NODES = 0;
/**
* Gets the next unique node ID.
*
* @return the next unique node ID.
*/
protected static long nextID() {
return PRINTED_NODES ++;
}
/**
* resets the counter for the nodes
*/
protected static void resetID() {
PRINTED_NODES = 0;
}
/**
* Returns the tree as if-then statements.
*
* @param className the name for the generated class
* @return the tree as a Java if-then type statement
* @throws Exception if something goes wrong
*/
public String toSource(String className)
throws Exception {
if (m_Tree == null) {
throw new Exception("REPTree: No model built yet.");
}
StringBuffer [] source = m_Tree.toSource(className, m_Tree);
return
"class " + className + " {\n\n"
+" public static double classify(Object [] i)\n"
+" throws Exception {\n\n"
+" double p = Double.NaN;\n"
+ source[0] // Assignment code
+" return p;\n"
+" }\n"
+ source[1] // Support code
+"}\n";
}
/**
* Returns the type of graph this classifier
* represents.
* @return Drawable.TREE
*/
public int graphType() {
return Drawable.TREE;
}
/**
* Outputs the decision tree as a graph
*
* @return the tree as a graph
* @throws Exception if generation fails
*/
public String graph() throws Exception {
if (m_Tree == null) {
throw new Exception("REPTree: No model built yet.");
}
StringBuffer resultBuff = new StringBuffer();
m_Tree.toGraph(resultBuff, 0, null);
String result = "digraph Tree {\n" + "edge [style=bold]\n" + resultBuff.toString()
+ "\n}\n";
return result;
}
/**
* Outputs the decision tree.
*
* @return a string representation of the classifier
*/
public String toString() {
if (m_zeroR != null) {
return "No attributes other than class. Using ZeroR.\n\n" + m_zeroR.toString();
}
if ((m_Tree == null)) {
return "REPTree: No model built yet.";
}
return
"\nREPTree\n============\n" + m_Tree.toString(0, null) + "\n" +
"\nSize of the tree : " + numNodes();
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 5928 $");
}
/**
* Main method for this class.
*
* @param argv the commandline options
*/
public static void main(String[] argv) {
runClassifier(new REPTree(), argv);
}
}