/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * Evaluation.java * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand * */ package weka.classifiers; import weka.classifiers.evaluation.NominalPrediction; import weka.classifiers.evaluation.NumericPrediction; import weka.classifiers.evaluation.ThresholdCurve; import weka.classifiers.evaluation.output.prediction.AbstractOutput; import weka.classifiers.evaluation.output.prediction.PlainText; import weka.classifiers.pmml.consumer.PMMLClassifier; import weka.classifiers.xml.XMLClassifier; import weka.core.Drawable; import weka.core.FastVector; import weka.core.Instance; import weka.core.Instances; import weka.core.Option; import weka.core.OptionHandler; import weka.core.RevisionHandler; import weka.core.RevisionUtils; import weka.core.Summarizable; import weka.core.Utils; import weka.core.Version; import weka.core.converters.ConverterUtils.DataSink; import weka.core.converters.ConverterUtils.DataSource; import weka.core.pmml.PMMLFactory; import weka.core.pmml.PMMLModel; import weka.core.xml.KOML; import weka.core.xml.XMLOptions; import weka.core.xml.XMLSerialization; import weka.estimators.UnivariateKernelEstimator; import java.beans.BeanInfo; import java.beans.Introspector; import java.beans.MethodDescriptor; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.InputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.OutputStream; import java.io.Reader; import java.lang.reflect.Method; import java.util.Date; import java.util.Enumeration; import java.util.Random; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; /** * Class for evaluating machine learning models.
* * ------------------------------------------------------------------- * * General options when evaluating a learning scheme from the command-line: * * -t filename
* public static void main(String [] args) {
* runClassifier(new FunkyClassifier(), args);
* }
*
*
*
* ------------------------------------------------------------------
*
* Example usage from within an application:
*
* Instances trainInstances = ... instances got from somewhere
* Instances testInstances = ... instances got from somewhere
* Classifier scheme = ... scheme got from somewhere
*
* Evaluation evaluation = new Evaluation(trainInstances);
* evaluation.evaluateModel(scheme, testInstances);
* System.out.println(evaluation.toSummaryString());
*
*
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @author Len Trigg (trigg@cs.waikato.ac.nz)
* @version $Revision: 6041 $
*/
public class Evaluation
implements Summarizable, RevisionHandler {
/** The number of classes. */
protected int m_NumClasses;
/** The number of folds for a cross-validation. */
protected int m_NumFolds;
/** The weight of all incorrectly classified instances. */
protected double m_Incorrect;
/** The weight of all correctly classified instances. */
protected double m_Correct;
/** The weight of all unclassified instances. */
protected double m_Unclassified;
/*** The weight of all instances that had no class assigned to them. */
protected double m_MissingClass;
/** The weight of all instances that had a class assigned to them. */
protected double m_WithClass;
/** Array for storing the confusion matrix. */
protected double [][] m_ConfusionMatrix;
/** The names of the classes. */
protected String [] m_ClassNames;
/** Is the class nominal or numeric? */
protected boolean m_ClassIsNominal;
/** The prior probabilities of the classes. */
protected double [] m_ClassPriors;
/** The sum of counts for priors. */
protected double m_ClassPriorsSum;
/** The cost matrix (if given). */
protected CostMatrix m_CostMatrix;
/** The total cost of predictions (includes instance weights). */
protected double m_TotalCost;
/** Sum of errors. */
protected double m_SumErr;
/** Sum of absolute errors. */
protected double m_SumAbsErr;
/** Sum of squared errors. */
protected double m_SumSqrErr;
/** Sum of class values. */
protected double m_SumClass;
/** Sum of squared class values. */
protected double m_SumSqrClass;
/*** Sum of predicted values. */
protected double m_SumPredicted;
/** Sum of squared predicted values. */
protected double m_SumSqrPredicted;
/** Sum of predicted * class values. */
protected double m_SumClassPredicted;
/** Sum of absolute errors of the prior. */
protected double m_SumPriorAbsErr;
/** Sum of absolute errors of the prior. */
protected double m_SumPriorSqrErr;
/** Total Kononenko & Bratko Information. */
protected double m_SumKBInfo;
/*** Resolution of the margin histogram. */
protected static int k_MarginResolution = 500;
/** Cumulative margin distribution. */
protected double m_MarginCounts [];
/** Number of non-missing class training instances seen. */
protected int m_NumTrainClassVals;
/** Array containing all numeric training class values seen. */
protected double [] m_TrainClassVals;
/** Array containing all numeric training class weights. */
protected double [] m_TrainClassWeights;
/** Numeric class estimator for prior. */
protected UnivariateKernelEstimator m_PriorEstimator;
/** Whether complexity statistics are available. */
protected boolean m_ComplexityStatisticsAvailable = true;
/**
* The minimum probablility accepted from an estimator to avoid
* taking log(0) in Sf calculations.
*/
protected static final double MIN_SF_PROB = Double.MIN_VALUE;
/** Total entropy of prior predictions. */
protected double m_SumPriorEntropy;
/** Total entropy of scheme predictions. */
protected double m_SumSchemeEntropy;
/** Whether coverage statistics are available. */
protected boolean m_CoverageStatisticsAvailable = true;
/** The confidence level used for coverage statistics. */
protected double m_ConfLevel = 0.95;
/** Total size of predicted regions at the given confidence level. */
protected double m_TotalSizeOfRegions;
/** Total coverage of test cases at the given confidence level. */
protected double m_TotalCoverage;
/** Minimum target value. */
protected double m_MinTarget;
/** Maximum target value. */
protected double m_MaxTarget;
/** The list of predictions that have been generated (for computing AUC). */
protected FastVector m_Predictions;
/** enables/disables the use of priors, e.g., if no training set is
* present in case of de-serialized schemes. */
protected boolean m_NoPriors = false;
/** The header of the training set. */
protected Instances m_Header;
/**
* Initializes all the counters for the evaluation.
* Use useNoPriors()
if the dataset is the test set and you
* can't initialize with the priors from the training set via
* setPriors(Instances)
.
*
* @param data set of training instances, to get some header
* information and prior class distribution information
* @throws Exception if the class is not defined
* @see #useNoPriors()
* @see #setPriors(Instances)
*/
public Evaluation(Instances data) throws Exception {
this(data, null);
}
/**
* Initializes all the counters for the evaluation and also takes a
* cost matrix as parameter.
* Use useNoPriors()
if the dataset is the test set and you
* can't initialize with the priors from the training set via
* setPriors(Instances)
.
*
* @param data set of training instances, to get some header
* information and prior class distribution information
* @param costMatrix the cost matrix---if null, default costs will be used
* @throws Exception if cost matrix is not compatible with
* data, the class is not defined or the class is numeric
* @see #useNoPriors()
* @see #setPriors(Instances)
*/
public Evaluation(Instances data, CostMatrix costMatrix)
throws Exception {
m_Header = new Instances(data, 0);
m_NumClasses = data.numClasses();
m_NumFolds = 1;
m_ClassIsNominal = data.classAttribute().isNominal();
if (m_ClassIsNominal) {
m_ConfusionMatrix = new double [m_NumClasses][m_NumClasses];
m_ClassNames = new String [m_NumClasses];
for(int i = 0; i < m_NumClasses; i++) {
m_ClassNames[i] = data.classAttribute().value(i);
}
}
m_CostMatrix = costMatrix;
if (m_CostMatrix != null) {
if (!m_ClassIsNominal) {
throw new Exception("Class has to be nominal if cost matrix given!");
}
if (m_CostMatrix.size() != m_NumClasses) {
throw new Exception("Cost matrix not compatible with data!");
}
}
m_ClassPriors = new double [m_NumClasses];
setPriors(data);
m_MarginCounts = new double [k_MarginResolution + 1];
}
/**
* Returns the header of the underlying dataset.
*
* @return the header information
*/
public Instances getHeader() {
return m_Header;
}
/**
* Returns the area under ROC for those predictions that have been collected
* in the evaluateClassifier(Classifier, Instances) method. Returns
* Utils.missingValue() if the area is not available.
*
* @param classIndex the index of the class to consider as "positive"
* @return the area under the ROC curve or not a number
*/
public double areaUnderROC(int classIndex) {
// Check if any predictions have been collected
if (m_Predictions == null) {
return Utils.missingValue();
} else {
ThresholdCurve tc = new ThresholdCurve();
Instances result = tc.getCurve(m_Predictions, classIndex);
return ThresholdCurve.getROCArea(result);
}
}
/**
* Calculates the weighted (by class size) AUC.
*
* @return the weighted AUC.
*/
public double weightedAreaUnderROC() {
double[] classCounts = new double[m_NumClasses];
double classCountSum = 0;
for (int i = 0; i < m_NumClasses; i++) {
for (int j = 0; j < m_NumClasses; j++) {
classCounts[i] += m_ConfusionMatrix[i][j];
}
classCountSum += classCounts[i];
}
double aucTotal = 0;
for(int i = 0; i < m_NumClasses; i++) {
double temp = areaUnderROC(i);
if (!Utils.isMissingValue(temp)) {
aucTotal += (temp * classCounts[i]);
}
}
return aucTotal / classCountSum;
}
/**
* Returns a copy of the confusion matrix.
*
* @return a copy of the confusion matrix as a two-dimensional array
*/
public double[][] confusionMatrix() {
double[][] newMatrix = new double[m_ConfusionMatrix.length][0];
for (int i = 0; i < m_ConfusionMatrix.length; i++) {
newMatrix[i] = new double[m_ConfusionMatrix[i].length];
System.arraycopy(m_ConfusionMatrix[i], 0, newMatrix[i], 0,
m_ConfusionMatrix[i].length);
}
return newMatrix;
}
/**
* Performs a (stratified if class is nominal) cross-validation
* for a classifier on a set of instances. Now performs
* a deep copy of the classifier before each call to
* buildClassifier() (just in case the classifier is not
* initialized properly).
*
* @param classifier the classifier with any options set.
* @param data the data on which the cross-validation is to be
* performed
* @param numFolds the number of folds for the cross-validation
* @param random random number generator for randomization
* @param forPredictionsPrinting varargs parameter that, if supplied, is
* expected to hold a weka.classifiers.evaluation.output.prediction.AbstractOutput
* object
* @throws Exception if a classifier could not be generated
* successfully or the class is not defined
*/
public void crossValidateModel(Classifier classifier,
Instances data, int numFolds, Random random,
Object... forPredictionsPrinting)
throws Exception {
// Make a copy of the data we can reorder
data = new Instances(data);
data.randomize(random);
if (data.classAttribute().isNominal()) {
data.stratify(numFolds);
}
// We assume that the first element is a
// weka.classifiers.evaluation.output.prediction.AbstractOutput object
AbstractOutput classificationOutput = null;
if (forPredictionsPrinting.length > 0) {
// print the header first
classificationOutput = (AbstractOutput) forPredictionsPrinting[0];
classificationOutput.setHeader(data);
classificationOutput.printHeader();
}
// Do the folds
for (int i = 0; i < numFolds; i++) {
Instances train = data.trainCV(numFolds, i, random);
setPriors(train);
Classifier copiedClassifier = AbstractClassifier.makeCopy(classifier);
copiedClassifier.buildClassifier(train);
Instances test = data.testCV(numFolds, i);
evaluateModel(copiedClassifier, test, forPredictionsPrinting);
}
m_NumFolds = numFolds;
if (classificationOutput != null)
classificationOutput.printFooter();
}
/**
* Performs a (stratified if class is nominal) cross-validation
* for a classifier on a set of instances.
*
* @param classifierString a string naming the class of the classifier
* @param data the data on which the cross-validation is to be
* performed
* @param numFolds the number of folds for the cross-validation
* @param options the options to the classifier. Any options
* @param random the random number generator for randomizing the data
* accepted by the classifier will be removed from this array.
* @throws Exception if a classifier could not be generated
* successfully or the class is not defined
*/
public void crossValidateModel(String classifierString,
Instances data, int numFolds,
String[] options, Random random)
throws Exception {
crossValidateModel(AbstractClassifier.forName(classifierString, options),
data, numFolds, random);
}
/**
* Evaluates a classifier with the options given in an array of
* strings.
*
* Valid options are:
*
* -t filename CostMatrix
value, or null if costFileName is empty
* @throws Exception if an error occurs.
*/
protected static CostMatrix handleCostOption(String costFileName,
int numClasses)
throws Exception {
if ((costFileName != null) && (costFileName.length() != 0)) {
System.out.println(
"NOTE: The behaviour of the -m option has changed between WEKA 3.0"
+" and WEKA 3.1. -m now carries out cost-sensitive *evaluation*"
+" only. For cost-sensitive *prediction*, use one of the"
+" cost-sensitive metaschemes such as"
+" weka.classifiers.meta.CostSensitiveClassifier or"
+" weka.classifiers.meta.MetaCost");
Reader costReader = null;
try {
costReader = new BufferedReader(new FileReader(costFileName));
} catch (Exception e) {
throw new Exception("Can't open file " + e.getMessage() + '.');
}
try {
// First try as a proper cost matrix format
return new CostMatrix(costReader);
} catch (Exception ex) {
try {
// Now try as the poxy old format :-)
//System.err.println("Attempting to read old format cost file");
try {
costReader.close(); // Close the old one
costReader = new BufferedReader(new FileReader(costFileName));
} catch (Exception e) {
throw new Exception("Can't open file " + e.getMessage() + '.');
}
CostMatrix costMatrix = new CostMatrix(numClasses);
//System.err.println("Created default cost matrix");
costMatrix.readOldFormat(costReader);
return costMatrix;
//System.err.println("Read old format");
} catch (Exception e2) {
// re-throw the original exception
//System.err.println("Re-throwing original exception");
throw ex;
}
}
} else {
return null;
}
}
/**
* Evaluates the classifier on a given set of instances. Note that
* the data must have exactly the same format (e.g. order of
* attributes) as the data used to train the classifier! Otherwise
* the results will generally be meaningless.
*
* @param classifier machine learning classifier
* @param data set of test instances for evaluation
* @param forPredictionsPrinting varargs parameter that, if supplied, is
* expected to hold a weka.classifiers.evaluation.output.prediction.AbstractOutput
* object
* @return the predictions
* @throws Exception if model could not be evaluated
* successfully
*/
public double[] evaluateModel(Classifier classifier,
Instances data,
Object... forPredictionsPrinting) throws Exception {
// for predictions printing
AbstractOutput classificationOutput = null;
double predictions[] = new double[data.numInstances()];
if (forPredictionsPrinting.length > 0) {
classificationOutput = (AbstractOutput) forPredictionsPrinting[0];
}
// Need to be able to collect predictions if appropriate (for AUC)
for (int i = 0; i < data.numInstances(); i++) {
predictions[i] = evaluateModelOnceAndRecordPrediction((Classifier)classifier,
data.instance(i));
if (classificationOutput != null)
classificationOutput.printClassification(classifier, data.instance(i), i);
}
return predictions;
}
/**
* Evaluates the supplied distribution on a single instance.
*
* @param dist the supplied distribution
* @param instance the test instance to be classified
* @param storePredictions whether to store predictions for nominal classifier
* @return the prediction
* @throws Exception if model could not be evaluated successfully
*/
public double evaluationForSingleInstance(double[] dist, Instance instance,
boolean storePredictions) throws Exception {
double pred;
if (m_ClassIsNominal) {
pred = Utils.maxIndex(dist);
if (dist[(int)pred] <= 0) {
pred = Utils.missingValue();
}
updateStatsForClassifier(dist, instance);
if (storePredictions) {
if (m_Predictions == null)
m_Predictions = new FastVector();
m_Predictions.addElement(new NominalPrediction(instance.classValue(), dist,
instance.weight()));
}
} else {
pred = dist[0];
updateStatsForPredictor(pred, instance);
if (storePredictions) {
if (m_Predictions == null)
m_Predictions = new FastVector();
m_Predictions.addElement(new NumericPrediction(instance.classValue(), pred,
instance.weight()));
}
}
return pred;
}
/**
* Evaluates the classifier on a single instance and records the
* prediction.
*
* @param classifier machine learning classifier
* @param instance the test instance to be classified
* @param storePredictions whether to store predictions for nominal classifier
* @return the prediction made by the clasifier
* @throws Exception if model could not be evaluated
* successfully or the data contains string attributes
*/
protected double evaluationForSingleInstance(Classifier classifier,
Instance instance,
boolean storePredictions) throws Exception {
Instance classMissing = (Instance)instance.copy();
classMissing.setDataset(instance.dataset());
classMissing.setClassMissing();
double pred = evaluationForSingleInstance(classifier.distributionForInstance(classMissing),
instance, storePredictions);
// We don't need to do the following if the class is nominal because in that case
// entropy and coverage statistics are always computed.
if (!m_ClassIsNominal) {
if (!instance.classIsMissing() && !Utils.isMissingValue(pred)) {
if (classifier instanceof IntervalEstimator) {
updateStatsForIntervalEstimator((IntervalEstimator)classifier, classMissing,
instance.classValue());
} else {
m_CoverageStatisticsAvailable = false;
}
if (classifier instanceof ConditionalDensityEstimator) {
updateStatsForConditionalDensityEstimator((ConditionalDensityEstimator)classifier,
classMissing, instance.classValue());
} else {
m_ComplexityStatisticsAvailable = false;
}
}
}
return pred;
}
/**
* Evaluates the classifier on a single instance and records the
* prediction.
*
* @param classifier machine learning classifier
* @param instance the test instance to be classified
* @return the prediction made by the clasifier
* @throws Exception if model could not be evaluated
* successfully or the data contains string attributes
*/
public double evaluateModelOnceAndRecordPrediction(Classifier classifier,
Instance instance) throws Exception {
return evaluationForSingleInstance(classifier, instance, true);
}
/**
* Evaluates the classifier on a single instance.
*
* @param classifier machine learning classifier
* @param instance the test instance to be classified
* @return the prediction made by the clasifier
* @throws Exception if model could not be evaluated
* successfully or the data contains string attributes
*/
public double evaluateModelOnce(Classifier classifier, Instance instance) throws Exception {
return evaluationForSingleInstance(classifier, instance, false);
}
/**
* Evaluates the supplied distribution on a single instance.
*
* @param dist the supplied distribution
* @param instance the test instance to be classified
* @return the prediction
* @throws Exception if model could not be evaluated
* successfully
*/
public double evaluateModelOnce(double [] dist, Instance instance) throws Exception {
return evaluationForSingleInstance(dist, instance, false);
}
/**
* Evaluates the supplied distribution on a single instance.
*
* @param dist the supplied distribution
* @param instance the test instance to be classified
* @return the prediction
* @throws Exception if model could not be evaluated
* successfully
*/
public double evaluateModelOnceAndRecordPrediction(double [] dist,
Instance instance) throws Exception {
return evaluationForSingleInstance(dist, instance, true);
}
/**
* Evaluates the supplied prediction on a single instance.
*
* @param prediction the supplied prediction
* @param instance the test instance to be classified
* @throws Exception if model could not be evaluated
* successfully
*/
public void evaluateModelOnce(double prediction,
Instance instance) throws Exception {
evaluateModelOnce(makeDistribution(prediction), instance);
}
/**
* Returns the predictions that have been collected.
*
* @return a reference to the FastVector containing the predictions
* that have been collected. This should be null if no predictions
* have been collected.
*/
public FastVector predictions() {
return m_Predictions;
}
/**
* Wraps a static classifier in enough source to test using the weka
* class libraries.
*
* @param classifier a Sourcable Classifier
* @param className the name to give to the source code class
* @return the source for a static classifier that can be tested with
* weka libraries.
* @throws Exception if code-generation fails
*/
public static String wekaStaticWrapper(Sourcable classifier, String className)
throws Exception {
StringBuffer result = new StringBuffer();
String staticClassifier = classifier.toSource(className);
result.append("// Generated with Weka " + Version.VERSION + "\n");
result.append("//\n");
result.append("// This code is public domain and comes with no warranty.\n");
result.append("//\n");
result.append("// Timestamp: " + new Date() + "\n");
result.append("\n");
result.append("package weka.classifiers;\n");
result.append("\n");
result.append("import weka.core.Attribute;\n");
result.append("import weka.core.Capabilities;\n");
result.append("import weka.core.Capabilities.Capability;\n");
result.append("import weka.core.Instance;\n");
result.append("import weka.core.Instances;\n");
result.append("import weka.core.RevisionUtils;\n");
result.append("import weka.classifiers.Classifier;\nimport weka.classifiers.AbstractClassifier;\n");
result.append("\n");
result.append("public class WekaWrapper\n");
result.append(" extends AbstractClassifier {\n");
// globalInfo
result.append("\n");
result.append(" /**\n");
result.append(" * Returns only the toString() method.\n");
result.append(" *\n");
result.append(" * @return a string describing the classifier\n");
result.append(" */\n");
result.append(" public String globalInfo() {\n");
result.append(" return toString();\n");
result.append(" }\n");
// getCapabilities
result.append("\n");
result.append(" /**\n");
result.append(" * Returns the capabilities of this classifier.\n");
result.append(" *\n");
result.append(" * @return the capabilities\n");
result.append(" */\n");
result.append(" public Capabilities getCapabilities() {\n");
result.append(((Classifier) classifier).getCapabilities().toSource("result", 4));
result.append(" return result;\n");
result.append(" }\n");
// buildClassifier
result.append("\n");
result.append(" /**\n");
result.append(" * only checks the data against its capabilities.\n");
result.append(" *\n");
result.append(" * @param i the training data\n");
result.append(" */\n");
result.append(" public void buildClassifier(Instances i) throws Exception {\n");
result.append(" // can classifier handle the data?\n");
result.append(" getCapabilities().testWithFail(i);\n");
result.append(" }\n");
// classifyInstance
result.append("\n");
result.append(" /**\n");
result.append(" * Classifies the given instance.\n");
result.append(" *\n");
result.append(" * @param i the instance to classify\n");
result.append(" * @return the classification result\n");
result.append(" */\n");
result.append(" public double classifyInstance(Instance i) throws Exception {\n");
result.append(" Object[] s = new Object[i.numAttributes()];\n");
result.append(" \n");
result.append(" for (int j = 0; j < s.length; j++) {\n");
result.append(" if (!i.isMissing(j)) {\n");
result.append(" if (i.attribute(j).isNominal())\n");
result.append(" s[j] = new String(i.stringValue(j));\n");
result.append(" else if (i.attribute(j).isNumeric())\n");
result.append(" s[j] = new Double(i.value(j));\n");
result.append(" }\n");
result.append(" }\n");
result.append(" \n");
result.append(" // set class value to missing\n");
result.append(" s[i.classIndex()] = null;\n");
result.append(" \n");
result.append(" return " + className + ".classify(s);\n");
result.append(" }\n");
// getRevision
result.append("\n");
result.append(" /**\n");
result.append(" * Returns the revision string.\n");
result.append(" * \n");
result.append(" * @return the revision\n");
result.append(" */\n");
result.append(" public String getRevision() {\n");
result.append(" return RevisionUtils.extract(\"1.0\");\n");
result.append(" }\n");
// toString
result.append("\n");
result.append(" /**\n");
result.append(" * Returns only the classnames and what classifier it is based on.\n");
result.append(" *\n");
result.append(" * @return a short description\n");
result.append(" */\n");
result.append(" public String toString() {\n");
result.append(" return \"Auto-generated classifier wrapper, based on "
+ classifier.getClass().getName() + " (generated with Weka " + Version.VERSION + ").\\n"
+ "\" + this.getClass().getName() + \"/" + className + "\";\n");
result.append(" }\n");
// main
result.append("\n");
result.append(" /**\n");
result.append(" * Runs the classfier from commandline.\n");
result.append(" *\n");
result.append(" * @param args the commandline arguments\n");
result.append(" */\n");
result.append(" public static void main(String args[]) {\n");
result.append(" runClassifier(new WekaWrapper(), args);\n");
result.append(" }\n");
result.append("}\n");
// actual classifier code
result.append("\n");
result.append(staticClassifier);
return result.toString();
}
/**
* Gets the number of test instances that had a known class value
* (actually the sum of the weights of test instances with known
* class value).
*
* @return the number of test instances with known class
*/
public final double numInstances() {
return m_WithClass;
}
/**
* Gets the coverage of the test cases by the predicted regions at
* the confidence level specified when evaluation was performed.
*
* @return the coverage of the test cases by the predicted regions
*/
public final double coverageOfTestCasesByPredictedRegions() {
if (!m_CoverageStatisticsAvailable)
return Double.NaN;
return 100 * m_TotalCoverage / m_WithClass;
}
/**
* Gets the average size of the predicted regions, relative to the
* range of the target in the training data, at the confidence level
* specified when evaluation was performed.
*
* @return the average size of the predicted regions
*/
public final double sizeOfPredictedRegions() {
if (m_NoPriors || !m_CoverageStatisticsAvailable)
return Double.NaN;
return 100 * m_TotalSizeOfRegions / m_WithClass;
}
/**
* Gets the number of instances incorrectly classified (that is, for
* which an incorrect prediction was made). (Actually the sum of the
* weights of these instances)
*
* @return the number of incorrectly classified instances
*/
public final double incorrect() {
return m_Incorrect;
}
/**
* Gets the percentage of instances incorrectly classified (that is,
* for which an incorrect prediction was made).
*
* @return the percent of incorrectly classified instances
* (between 0 and 100)
*/
public final double pctIncorrect() {
return 100 * m_Incorrect / m_WithClass;
}
/**
* Gets the total cost, that is, the cost of each prediction times the
* weight of the instance, summed over all instances.
*
* @return the total cost
*/
public final double totalCost() {
return m_TotalCost;
}
/**
* Gets the average cost, that is, total cost of misclassifications
* (incorrect plus unclassified) over the total number of instances.
*
* @return the average cost.
*/
public final double avgCost() {
return m_TotalCost / m_WithClass;
}
/**
* Gets the number of instances correctly classified (that is, for
* which a correct prediction was made). (Actually the sum of the weights
* of these instances)
*
* @return the number of correctly classified instances
*/
public final double correct() {
return m_Correct;
}
/**
* Gets the percentage of instances correctly classified (that is, for
* which a correct prediction was made).
*
* @return the percent of correctly classified instances (between 0 and 100)
*/
public final double pctCorrect() {
return 100 * m_Correct / m_WithClass;
}
/**
* Gets the number of instances not classified (that is, for
* which no prediction was made by the classifier). (Actually the sum
* of the weights of these instances)
*
* @return the number of unclassified instances
*/
public final double unclassified() {
return m_Unclassified;
}
/**
* Gets the percentage of instances not classified (that is, for
* which no prediction was made by the classifier).
*
* @return the percent of unclassified instances (between 0 and 100)
*/
public final double pctUnclassified() {
return 100 * m_Unclassified / m_WithClass;
}
/**
* Returns the estimated error rate or the root mean squared error
* (if the class is numeric). If a cost matrix was given this
* error rate gives the average cost.
*
* @return the estimated error rate (between 0 and 1, or between 0 and
* maximum cost)
*/
public final double errorRate() {
if (!m_ClassIsNominal) {
return Math.sqrt(m_SumSqrErr / (m_WithClass - m_Unclassified));
}
if (m_CostMatrix == null) {
return m_Incorrect / m_WithClass;
} else {
return avgCost();
}
}
/**
* Returns value of kappa statistic if class is nominal.
*
* @return the value of the kappa statistic
*/
public final double kappa() {
double[] sumRows = new double[m_ConfusionMatrix.length];
double[] sumColumns = new double[m_ConfusionMatrix.length];
double sumOfWeights = 0;
for (int i = 0; i < m_ConfusionMatrix.length; i++) {
for (int j = 0; j < m_ConfusionMatrix.length; j++) {
sumRows[i] += m_ConfusionMatrix[i][j];
sumColumns[j] += m_ConfusionMatrix[i][j];
sumOfWeights += m_ConfusionMatrix[i][j];
}
}
double correct = 0, chanceAgreement = 0;
for (int i = 0; i < m_ConfusionMatrix.length; i++) {
chanceAgreement += (sumRows[i] * sumColumns[i]);
correct += m_ConfusionMatrix[i][i];
}
chanceAgreement /= (sumOfWeights * sumOfWeights);
correct /= sumOfWeights;
if (chanceAgreement < 1) {
return (correct - chanceAgreement) / (1 - chanceAgreement);
} else {
return 1;
}
}
/**
* Returns the correlation coefficient if the class is numeric.
*
* @return the correlation coefficient
* @throws Exception if class is not numeric
*/
public final double correlationCoefficient() throws Exception {
if (m_ClassIsNominal) {
throw
new Exception("Can't compute correlation coefficient: " +
"class is nominal!");
}
double correlation = 0;
double varActual =
m_SumSqrClass - m_SumClass * m_SumClass /
(m_WithClass - m_Unclassified);
double varPredicted =
m_SumSqrPredicted - m_SumPredicted * m_SumPredicted /
(m_WithClass - m_Unclassified);
double varProd =
m_SumClassPredicted - m_SumClass * m_SumPredicted /
(m_WithClass - m_Unclassified);
if (varActual * varPredicted <= 0) {
correlation = 0.0;
} else {
correlation = varProd / Math.sqrt(varActual * varPredicted);
}
return correlation;
}
/**
* Returns the mean absolute error. Refers to the error of the
* predicted values for numeric classes, and the error of the
* predicted probability distribution for nominal classes.
*
* @return the mean absolute error
*/
public final double meanAbsoluteError() {
return m_SumAbsErr / (m_WithClass - m_Unclassified);
}
/**
* Returns the mean absolute error of the prior.
*
* @return the mean absolute error
*/
public final double meanPriorAbsoluteError() {
if (m_NoPriors)
return Double.NaN;
return m_SumPriorAbsErr / m_WithClass;
}
/**
* Returns the relative absolute error.
*
* @return the relative absolute error
* @throws Exception if it can't be computed
*/
public final double relativeAbsoluteError() throws Exception {
if (m_NoPriors)
return Double.NaN;
return 100 * meanAbsoluteError() / meanPriorAbsoluteError();
}
/**
* Returns the root mean squared error.
*
* @return the root mean squared error
*/
public final double rootMeanSquaredError() {
return Math.sqrt(m_SumSqrErr / (m_WithClass - m_Unclassified));
}
/**
* Returns the root mean prior squared error.
*
* @return the root mean prior squared error
*/
public final double rootMeanPriorSquaredError() {
if (m_NoPriors)
return Double.NaN;
return Math.sqrt(m_SumPriorSqrErr / m_WithClass);
}
/**
* Returns the root relative squared error if the class is numeric.
*
* @return the root relative squared error
*/
public final double rootRelativeSquaredError() {
if (m_NoPriors)
return Double.NaN;
return 100.0 * rootMeanSquaredError() / rootMeanPriorSquaredError();
}
/**
* Calculate the entropy of the prior distribution.
*
* @return the entropy of the prior distribution
* @throws Exception if the class is not nominal
*/
public final double priorEntropy() throws Exception {
if (!m_ClassIsNominal) {
throw
new Exception("Can't compute entropy of class prior: " +
"class numeric!");
}
if (m_NoPriors)
return Double.NaN;
double entropy = 0;
for(int i = 0; i < m_NumClasses; i++) {
entropy -= m_ClassPriors[i] / m_ClassPriorsSum *
Utils.log2(m_ClassPriors[i] / m_ClassPriorsSum);
}
return entropy;
}
/**
* Return the total Kononenko & Bratko Information score in bits.
*
* @return the K&B information score
* @throws Exception if the class is not nominal
*/
public final double KBInformation() throws Exception {
if (!m_ClassIsNominal) {
throw
new Exception("Can't compute K&B Info score: " +
"class numeric!");
}
if (m_NoPriors)
return Double.NaN;
return m_SumKBInfo;
}
/**
* Return the Kononenko & Bratko Information score in bits per
* instance.
*
* @return the K&B information score
* @throws Exception if the class is not nominal
*/
public final double KBMeanInformation() throws Exception {
if (!m_ClassIsNominal) {
throw
new Exception("Can't compute K&B Info score: class numeric!");
}
if (m_NoPriors)
return Double.NaN;
return m_SumKBInfo / (m_WithClass - m_Unclassified);
}
/**
* Return the Kononenko & Bratko Relative Information score.
*
* @return the K&B relative information score
* @throws Exception if the class is not nominal
*/
public final double KBRelativeInformation() throws Exception {
if (!m_ClassIsNominal) {
throw
new Exception("Can't compute K&B Info score: " +
"class numeric!");
}
if (m_NoPriors)
return Double.NaN;
return 100.0 * KBInformation() / priorEntropy();
}
/**
* Returns the total entropy for the null model.
*
* @return the total null model entropy
*/
public final double SFPriorEntropy() {
if (m_NoPriors || !m_ComplexityStatisticsAvailable)
return Double.NaN;
return m_SumPriorEntropy;
}
/**
* Returns the entropy per instance for the null model.
*
* @return the null model entropy per instance
*/
public final double SFMeanPriorEntropy() {
if (m_NoPriors || !m_ComplexityStatisticsAvailable)
return Double.NaN;
return m_SumPriorEntropy / m_WithClass;
}
/**
* Returns the total entropy for the scheme.
*
* @return the total scheme entropy
*/
public final double SFSchemeEntropy() {
if (!m_ComplexityStatisticsAvailable)
return Double.NaN;
return m_SumSchemeEntropy;
}
/**
* Returns the entropy per instance for the scheme.
*
* @return the scheme entropy per instance
*/
public final double SFMeanSchemeEntropy() {
if (!m_ComplexityStatisticsAvailable)
return Double.NaN;
return m_SumSchemeEntropy / (m_WithClass - m_Unclassified);
}
/**
* Returns the total SF, which is the null model entropy minus
* the scheme entropy.
*
* @return the total SF
*/
public final double SFEntropyGain() {
if (m_NoPriors || !m_ComplexityStatisticsAvailable)
return Double.NaN;
return m_SumPriorEntropy - m_SumSchemeEntropy;
}
/**
* Returns the SF per instance, which is the null model entropy
* minus the scheme entropy, per instance.
*
* @return the SF per instance
*/
public final double SFMeanEntropyGain() {
if (m_NoPriors || !m_ComplexityStatisticsAvailable)
return Double.NaN;
return (m_SumPriorEntropy - m_SumSchemeEntropy) /
(m_WithClass - m_Unclassified);
}
/**
* Output the cumulative margin distribution as a string suitable
* for input for gnuplot or similar package.
*
* @return the cumulative margin distribution
* @throws Exception if the class attribute is nominal
*/
public String toCumulativeMarginDistributionString() throws Exception {
if (!m_ClassIsNominal) {
throw new Exception("Class must be nominal for margin distributions");
}
String result = "";
double cumulativeCount = 0;
double margin;
for(int i = 0; i <= k_MarginResolution; i++) {
if (m_MarginCounts[i] != 0) {
cumulativeCount += m_MarginCounts[i];
margin = (double)i * 2.0 / k_MarginResolution - 1.0;
result = result + Utils.doubleToString(margin, 7, 3) + ' '
+ Utils.doubleToString(cumulativeCount * 100
/ m_WithClass, 7, 3) + '\n';
} else if (i == 0) {
result = Utils.doubleToString(-1.0, 7, 3) + ' '
+ Utils.doubleToString(0, 7, 3) + '\n';
}
}
return result;
}
/**
* Calls toSummaryString() with no title and no complexity stats.
*
* @return a summary description of the classifier evaluation
*/
public String toSummaryString() {
return toSummaryString("", false);
}
/**
* Calls toSummaryString() with a default title.
*
* @param printComplexityStatistics if true, complexity statistics are
* returned as well
* @return the summary string
*/
public String toSummaryString(boolean printComplexityStatistics) {
return toSummaryString("=== Summary ===\n", printComplexityStatistics);
}
/**
* Outputs the performance statistics in summary form. Lists
* number (and percentage) of instances classified correctly,
* incorrectly and unclassified. Outputs the total number of
* instances classified, and the number of instances (if any)
* that had no class value provided.
*
* @param title the title for the statistics
* @param printComplexityStatistics if true, complexity statistics are
* returned as well
* @return the summary as a String
*/
public String toSummaryString(String title,
boolean printComplexityStatistics) {
StringBuffer text = new StringBuffer();
if (printComplexityStatistics && m_NoPriors) {
printComplexityStatistics = false;
System.err.println("Priors disabled, cannot print complexity statistics!");
}
text.append(title + "\n");
try {
if (m_WithClass > 0) {
if (m_ClassIsNominal) {
text.append("Correctly Classified Instances ");
text.append(Utils.doubleToString(correct(), 12, 4) + " " +
Utils.doubleToString(pctCorrect(),
12, 4) + " %\n");
text.append("Incorrectly Classified Instances ");
text.append(Utils.doubleToString(incorrect(), 12, 4) + " " +
Utils.doubleToString(pctIncorrect(),
12, 4) + " %\n");
text.append("Kappa statistic ");
text.append(Utils.doubleToString(kappa(), 12, 4) + "\n");
if (m_CostMatrix != null) {
text.append("Total Cost ");
text.append(Utils.doubleToString(totalCost(), 12, 4) + "\n");
text.append("Average Cost ");
text.append(Utils.doubleToString(avgCost(), 12, 4) + "\n");
}
if (printComplexityStatistics) {
text.append("K&B Relative Info Score ");
text.append(Utils.doubleToString(KBRelativeInformation(), 12, 4)
+ " %\n");
text.append("K&B Information Score ");
text.append(Utils.doubleToString(KBInformation(), 12, 4)
+ " bits");
text.append(Utils.doubleToString(KBMeanInformation(), 12, 4)
+ " bits/instance\n");
}
} else {
text.append("Correlation coefficient ");
text.append(Utils.doubleToString(correlationCoefficient(), 12 , 4) +
"\n");
}
if (printComplexityStatistics && m_ComplexityStatisticsAvailable) {
text.append("Class complexity | order 0 ");
text.append(Utils.doubleToString(SFPriorEntropy(), 12, 4)
+ " bits");
text.append(Utils.doubleToString(SFMeanPriorEntropy(), 12, 4)
+ " bits/instance\n");
text.append("Class complexity | scheme ");
text.append(Utils.doubleToString(SFSchemeEntropy(), 12, 4)
+ " bits");
text.append(Utils.doubleToString(SFMeanSchemeEntropy(), 12, 4)
+ " bits/instance\n");
text.append("Complexity improvement (Sf) ");
text.append(Utils.doubleToString(SFEntropyGain(), 12, 4) + " bits");
text.append(Utils.doubleToString(SFMeanEntropyGain(), 12, 4)
+ " bits/instance\n");
}
text.append("Mean absolute error ");
text.append(Utils.doubleToString(meanAbsoluteError(), 12, 4)
+ "\n");
text.append("Root mean squared error ");
text.append(Utils.
doubleToString(rootMeanSquaredError(), 12, 4)
+ "\n");
if (!m_NoPriors) {
text.append("Relative absolute error ");
text.append(Utils.doubleToString(relativeAbsoluteError(),
12, 4) + " %\n");
text.append("Root relative squared error ");
text.append(Utils.doubleToString(rootRelativeSquaredError(),
12, 4) + " %\n");
}
if (m_CoverageStatisticsAvailable) {
text.append("Coverage of cases (" + Utils.doubleToString(m_ConfLevel, 4, 2) + " level) ");
text.append(Utils.doubleToString(coverageOfTestCasesByPredictedRegions(),
12, 4) + " %\n");
if (!m_NoPriors) {
text.append("Mean rel. region size (" + Utils.doubleToString(m_ConfLevel, 4, 2) + " level) ");
text.append(Utils.doubleToString(sizeOfPredictedRegions(), 12, 4) + " %\n");
}
}
}
if (Utils.gr(unclassified(), 0)) {
text.append("UnClassified Instances ");
text.append(Utils.doubleToString(unclassified(), 12,4) + " " +
Utils.doubleToString(pctUnclassified(),
12, 4) + " %\n");
}
text.append("Total Number of Instances ");
text.append(Utils.doubleToString(m_WithClass, 12, 4) + "\n");
if (m_MissingClass > 0) {
text.append("Ignored Class Unknown Instances ");
text.append(Utils.doubleToString(m_MissingClass, 12, 4) + "\n");
}
} catch (Exception ex) {
// Should never occur since the class is known to be nominal
// here
System.err.println("Arggh - Must be a bug in Evaluation class");
}
return text.toString();
}
/**
* Calls toMatrixString() with a default title.
*
* @return the confusion matrix as a string
* @throws Exception if the class is numeric
*/
public String toMatrixString() throws Exception {
return toMatrixString("=== Confusion Matrix ===\n");
}
/**
* Outputs the performance statistics as a classification confusion
* matrix. For each class value, shows the distribution of
* predicted class values.
*
* @param title the title for the confusion matrix
* @return the confusion matrix as a String
* @throws Exception if the class is numeric
*/
public String toMatrixString(String title) throws Exception {
StringBuffer text = new StringBuffer();
char [] IDChars = {'a','b','c','d','e','f','g','h','i','j',
'k','l','m','n','o','p','q','r','s','t',
'u','v','w','x','y','z'};
int IDWidth;
boolean fractional = false;
if (!m_ClassIsNominal) {
throw new Exception("Evaluation: No confusion matrix possible!");
}
// Find the maximum value in the matrix
// and check for fractional display requirement
double maxval = 0;
for(int i = 0; i < m_NumClasses; i++) {
for(int j = 0; j < m_NumClasses; j++) {
double current = m_ConfusionMatrix[i][j];
if (current < 0) {
current *= -10;
}
if (current > maxval) {
maxval = current;
}
double fract = current - Math.rint(current);
if (!fractional && ((Math.log(fract) / Math.log(10)) >= -2)) {
fractional = true;
}
}
}
IDWidth = 1 + Math.max((int)(Math.log(maxval) / Math.log(10)
+ (fractional ? 3 : 0)),
(int)(Math.log(m_NumClasses) /
Math.log(IDChars.length)));
text.append(title).append("\n");
for(int i = 0; i < m_NumClasses; i++) {
if (fractional) {
text.append(" ").append(num2ShortID(i,IDChars,IDWidth - 3))
.append(" ");
} else {
text.append(" ").append(num2ShortID(i,IDChars,IDWidth));
}
}
text.append(" <-- classified as\n");
for(int i = 0; i< m_NumClasses; i++) {
for(int j = 0; j < m_NumClasses; j++) {
text.append(" ").append(
Utils.doubleToString(m_ConfusionMatrix[i][j],
IDWidth,
(fractional ? 2 : 0)));
}
text.append(" | ").append(num2ShortID(i,IDChars,IDWidth))
.append(" = ").append(m_ClassNames[i]).append("\n");
}
return text.toString();
}
/**
* Generates a breakdown of the accuracy for each class (with default title),
* incorporating various information-retrieval statistics, such as
* true/false positive rate, precision/recall/F-Measure. Should be
* useful for ROC curves, recall/precision curves.
*
* @return the statistics presented as a string
* @throws Exception if class is not nominal
*/
public String toClassDetailsString() throws Exception {
return toClassDetailsString("=== Detailed Accuracy By Class ===\n");
}
/**
* Generates a breakdown of the accuracy for each class,
* incorporating various information-retrieval statistics, such as
* true/false positive rate, precision/recall/F-Measure. Should be
* useful for ROC curves, recall/precision curves.
*
* @param title the title to prepend the stats string with
* @return the statistics presented as a string
* @throws Exception if class is not nominal
*/
public String toClassDetailsString(String title) throws Exception {
if (!m_ClassIsNominal) {
throw new Exception("Evaluation: No per class statistics possible!");
}
StringBuffer text = new StringBuffer(title
+ "\n TP Rate FP Rate"
+ " Precision Recall"
+ " F-Measure ROC Area Class\n");
for(int i = 0; i < m_NumClasses; i++) {
text.append(" " + Utils.doubleToString(truePositiveRate(i), 7, 3))
.append(" ");
text.append(Utils.doubleToString(falsePositiveRate(i), 7, 3))
.append(" ");
text.append(Utils.doubleToString(precision(i), 7, 3))
.append(" ");
text.append(Utils.doubleToString(recall(i), 7, 3))
.append(" ");
text.append(Utils.doubleToString(fMeasure(i), 7, 3))
.append(" ");
double rocVal = areaUnderROC(i);
if (Utils.isMissingValue(rocVal)) {
text.append(" ? ")
.append(" ");
} else {
text.append(Utils.doubleToString(rocVal, 7, 3))
.append(" ");
}
text.append(m_ClassNames[i]).append('\n');
}
text.append("Weighted Avg. " + Utils.doubleToString(weightedTruePositiveRate(), 7, 3));
text.append(" " + Utils.doubleToString(weightedFalsePositiveRate(), 7 ,3));
text.append(" " + Utils.doubleToString(weightedPrecision(), 7 ,3));
text.append(" " + Utils.doubleToString(weightedRecall(), 7 ,3));
text.append(" " + Utils.doubleToString(weightedFMeasure(), 7 ,3));
text.append(" " + Utils.doubleToString(weightedAreaUnderROC(), 7 ,3));
text.append("\n");
return text.toString();
}
/**
* Calculate the number of true positives with respect to a particular class.
* This is defined as
* * correctly classified positives ** * @param classIndex the index of the class to consider as "positive" * @return the true positive rate */ public double numTruePositives(int classIndex) { double correct = 0; for (int j = 0; j < m_NumClasses; j++) { if (j == classIndex) { correct += m_ConfusionMatrix[classIndex][j]; } } return correct; } /** * Calculate the true positive rate with respect to a particular class. * This is defined as *
* correctly classified positives * ------------------------------ * total positives ** * @param classIndex the index of the class to consider as "positive" * @return the true positive rate */ public double truePositiveRate(int classIndex) { double correct = 0, total = 0; for (int j = 0; j < m_NumClasses; j++) { if (j == classIndex) { correct += m_ConfusionMatrix[classIndex][j]; } total += m_ConfusionMatrix[classIndex][j]; } if (total == 0) { return 0; } return correct / total; } /** * Calculates the weighted (by class size) true positive rate. * * @return the weighted true positive rate. */ public double weightedTruePositiveRate() { double[] classCounts = new double[m_NumClasses]; double classCountSum = 0; for (int i = 0; i < m_NumClasses; i++) { for (int j = 0; j < m_NumClasses; j++) { classCounts[i] += m_ConfusionMatrix[i][j]; } classCountSum += classCounts[i]; } double truePosTotal = 0; for(int i = 0; i < m_NumClasses; i++) { double temp = truePositiveRate(i); truePosTotal += (temp * classCounts[i]); } return truePosTotal / classCountSum; } /** * Calculate the number of true negatives with respect to a particular class. * This is defined as *
* correctly classified negatives ** * @param classIndex the index of the class to consider as "positive" * @return the true positive rate */ public double numTrueNegatives(int classIndex) { double correct = 0; for (int i = 0; i < m_NumClasses; i++) { if (i != classIndex) { for (int j = 0; j < m_NumClasses; j++) { if (j != classIndex) { correct += m_ConfusionMatrix[i][j]; } } } } return correct; } /** * Calculate the true negative rate with respect to a particular class. * This is defined as *
* correctly classified negatives * ------------------------------ * total negatives ** * @param classIndex the index of the class to consider as "positive" * @return the true positive rate */ public double trueNegativeRate(int classIndex) { double correct = 0, total = 0; for (int i = 0; i < m_NumClasses; i++) { if (i != classIndex) { for (int j = 0; j < m_NumClasses; j++) { if (j != classIndex) { correct += m_ConfusionMatrix[i][j]; } total += m_ConfusionMatrix[i][j]; } } } if (total == 0) { return 0; } return correct / total; } /** * Calculates the weighted (by class size) true negative rate. * * @return the weighted true negative rate. */ public double weightedTrueNegativeRate() { double[] classCounts = new double[m_NumClasses]; double classCountSum = 0; for (int i = 0; i < m_NumClasses; i++) { for (int j = 0; j < m_NumClasses; j++) { classCounts[i] += m_ConfusionMatrix[i][j]; } classCountSum += classCounts[i]; } double trueNegTotal = 0; for(int i = 0; i < m_NumClasses; i++) { double temp = trueNegativeRate(i); trueNegTotal += (temp * classCounts[i]); } return trueNegTotal / classCountSum; } /** * Calculate number of false positives with respect to a particular class. * This is defined as *
* incorrectly classified negatives ** * @param classIndex the index of the class to consider as "positive" * @return the false positive rate */ public double numFalsePositives(int classIndex) { double incorrect = 0; for (int i = 0; i < m_NumClasses; i++) { if (i != classIndex) { for (int j = 0; j < m_NumClasses; j++) { if (j == classIndex) { incorrect += m_ConfusionMatrix[i][j]; } } } } return incorrect; } /** * Calculate the false positive rate with respect to a particular class. * This is defined as *
* incorrectly classified negatives * -------------------------------- * total negatives ** * @param classIndex the index of the class to consider as "positive" * @return the false positive rate */ public double falsePositiveRate(int classIndex) { double incorrect = 0, total = 0; for (int i = 0; i < m_NumClasses; i++) { if (i != classIndex) { for (int j = 0; j < m_NumClasses; j++) { if (j == classIndex) { incorrect += m_ConfusionMatrix[i][j]; } total += m_ConfusionMatrix[i][j]; } } } if (total == 0) { return 0; } return incorrect / total; } /** * Calculates the weighted (by class size) false positive rate. * * @return the weighted false positive rate. */ public double weightedFalsePositiveRate() { double[] classCounts = new double[m_NumClasses]; double classCountSum = 0; for (int i = 0; i < m_NumClasses; i++) { for (int j = 0; j < m_NumClasses; j++) { classCounts[i] += m_ConfusionMatrix[i][j]; } classCountSum += classCounts[i]; } double falsePosTotal = 0; for(int i = 0; i < m_NumClasses; i++) { double temp = falsePositiveRate(i); falsePosTotal += (temp * classCounts[i]); } return falsePosTotal / classCountSum; } /** * Calculate number of false negatives with respect to a particular class. * This is defined as *
* incorrectly classified positives ** * @param classIndex the index of the class to consider as "positive" * @return the false positive rate */ public double numFalseNegatives(int classIndex) { double incorrect = 0; for (int i = 0; i < m_NumClasses; i++) { if (i == classIndex) { for (int j = 0; j < m_NumClasses; j++) { if (j != classIndex) { incorrect += m_ConfusionMatrix[i][j]; } } } } return incorrect; } /** * Calculate the false negative rate with respect to a particular class. * This is defined as *
* incorrectly classified positives * -------------------------------- * total positives ** * @param classIndex the index of the class to consider as "positive" * @return the false positive rate */ public double falseNegativeRate(int classIndex) { double incorrect = 0, total = 0; for (int i = 0; i < m_NumClasses; i++) { if (i == classIndex) { for (int j = 0; j < m_NumClasses; j++) { if (j != classIndex) { incorrect += m_ConfusionMatrix[i][j]; } total += m_ConfusionMatrix[i][j]; } } } if (total == 0) { return 0; } return incorrect / total; } /** * Calculates the weighted (by class size) false negative rate. * * @return the weighted false negative rate. */ public double weightedFalseNegativeRate() { double[] classCounts = new double[m_NumClasses]; double classCountSum = 0; for (int i = 0; i < m_NumClasses; i++) { for (int j = 0; j < m_NumClasses; j++) { classCounts[i] += m_ConfusionMatrix[i][j]; } classCountSum += classCounts[i]; } double falseNegTotal = 0; for(int i = 0; i < m_NumClasses; i++) { double temp = falseNegativeRate(i); falseNegTotal += (temp * classCounts[i]); } return falseNegTotal / classCountSum; } /** * Calculate the recall with respect to a particular class. * This is defined as *
* correctly classified positives * ------------------------------ * total positives ** (Which is also the same as the truePositiveRate.) * * @param classIndex the index of the class to consider as "positive" * @return the recall */ public double recall(int classIndex) { return truePositiveRate(classIndex); } /** * Calculates the weighted (by class size) recall. * * @return the weighted recall. */ public double weightedRecall() { return weightedTruePositiveRate(); } /** * Calculate the precision with respect to a particular class. * This is defined as *
* correctly classified positives * ------------------------------ * total predicted as positive ** * @param classIndex the index of the class to consider as "positive" * @return the precision */ public double precision(int classIndex) { double correct = 0, total = 0; for (int i = 0; i < m_NumClasses; i++) { if (i == classIndex) { correct += m_ConfusionMatrix[i][classIndex]; } total += m_ConfusionMatrix[i][classIndex]; } if (total == 0) { return 0; } return correct / total; } /** * Calculates the weighted (by class size) false precision. * * @return the weighted precision. */ public double weightedPrecision() { double[] classCounts = new double[m_NumClasses]; double classCountSum = 0; for (int i = 0; i < m_NumClasses; i++) { for (int j = 0; j < m_NumClasses; j++) { classCounts[i] += m_ConfusionMatrix[i][j]; } classCountSum += classCounts[i]; } double precisionTotal = 0; for(int i = 0; i < m_NumClasses; i++) { double temp = precision(i); precisionTotal += (temp * classCounts[i]); } return precisionTotal / classCountSum; } /** * Calculate the F-Measure with respect to a particular class. * This is defined as *
* 2 * recall * precision * ---------------------- * recall + precision ** * @param classIndex the index of the class to consider as "positive" * @return the F-Measure */ public double fMeasure(int classIndex) { double precision = precision(classIndex); double recall = recall(classIndex); if ((precision + recall) == 0) { return 0; } return 2 * precision * recall / (precision + recall); } /** * Calculates the macro weighted (by class size) average * F-Measure. * * @return the weighted F-Measure. */ public double weightedFMeasure() { double[] classCounts = new double[m_NumClasses]; double classCountSum = 0; for (int i = 0; i < m_NumClasses; i++) { for (int j = 0; j < m_NumClasses; j++) { classCounts[i] += m_ConfusionMatrix[i][j]; } classCountSum += classCounts[i]; } double fMeasureTotal = 0; for(int i = 0; i < m_NumClasses; i++) { double temp = fMeasure(i); fMeasureTotal += (temp * classCounts[i]); } return fMeasureTotal / classCountSum; } /** * Unweighted macro-averaged F-measure. If some classes not present in the * test set, they're just skipped (since recall is undefined there anyway) . * * @return unweighted macro-averaged F-measure. * */ public double unweightedMacroFmeasure() { weka.experiment.Stats rr = new weka.experiment.Stats(); for (int c = 0; c < m_NumClasses; c++) { // skip if no testing positive cases of this class if (numTruePositives(c)+numFalseNegatives(c) > 0) { rr.add(fMeasure(c)); } } rr.calculateDerived(); return rr.mean; } /** * Unweighted micro-averaged F-measure. If some classes not present in the * test set, they have no effect. * * Note: if the test set is *single-label*, then this is the same as accuracy. * * @return unweighted micro-averaged F-measure. */ public double unweightedMicroFmeasure() { double tp = 0; double fn = 0; double fp = 0; for (int c = 0; c < m_NumClasses; c++) { tp += numTruePositives(c); fn += numFalseNegatives(c); fp += numFalsePositives(c); } return 2*tp / (2*tp + fn + fp); } /** * Sets the class prior probabilities. * * @param train the training instances used to determine the prior probabilities * @throws Exception if the class attribute of the instances is not set */ public void setPriors(Instances train) throws Exception { m_NoPriors = false; if (!m_ClassIsNominal) { m_NumTrainClassVals = 0; m_TrainClassVals = null; m_TrainClassWeights = null; m_PriorEstimator = null; m_MinTarget = Double.MAX_VALUE; m_MaxTarget = -Double.MAX_VALUE; for (int i = 0; i < train.numInstances(); i++) { Instance currentInst = train.instance(i); if (!currentInst.classIsMissing()) { addNumericTrainClass(currentInst.classValue(), currentInst.weight()); } } m_ClassPriors[0] = m_ClassPriorsSum = 0; for (int i = 0; i < train.numInstances(); i++) { if (!train.instance(i).classIsMissing()) { m_ClassPriors[0] += train.instance(i).classValue() * train.instance(i).weight(); m_ClassPriorsSum += train.instance(i).weight(); } } } else { for (int i = 0; i < m_NumClasses; i++) { m_ClassPriors[i] = 1; } m_ClassPriorsSum = m_NumClasses; for (int i = 0; i < train.numInstances(); i++) { if (!train.instance(i).classIsMissing()) { m_ClassPriors[(int)train.instance(i).classValue()] += train.instance(i).weight(); m_ClassPriorsSum += train.instance(i).weight(); } } m_MaxTarget = m_NumClasses; m_MinTarget = 0; } } /** * Get the current weighted class counts. * * @return the weighted class counts */ public double [] getClassPriors() { return m_ClassPriors; } /** * Updates the class prior probabilities or the mean respectively (when incrementally * training). * * @param instance the new training instance seen * @throws Exception if the class of the instance is not set */ public void updatePriors(Instance instance) throws Exception { if (!instance.classIsMissing()) { if (!m_ClassIsNominal) { addNumericTrainClass(instance.classValue(), instance.weight()); m_ClassPriors[0] += instance.classValue() * instance.weight(); m_ClassPriorsSum += instance.weight(); } else { m_ClassPriors[(int)instance.classValue()] += instance.weight(); m_ClassPriorsSum += instance.weight(); } } } /** * disables the use of priors, e.g., in case of de-serialized schemes * that have no access to the original training set, but are evaluated * on a set set. */ public void useNoPriors() { m_NoPriors = true; } /** * Tests whether the current evaluation object is equal to another * evaluation object. * * @param obj the object to compare against * @return true if the two objects are equal */ public boolean equals(Object obj) { if ((obj == null) || !(obj.getClass().equals(this.getClass()))) { return false; } Evaluation cmp = (Evaluation) obj; if (m_ClassIsNominal != cmp.m_ClassIsNominal) return false; if (m_NumClasses != cmp.m_NumClasses) return false; if (m_Incorrect != cmp.m_Incorrect) return false; if (m_Correct != cmp.m_Correct) return false; if (m_Unclassified != cmp.m_Unclassified) return false; if (m_MissingClass != cmp.m_MissingClass) return false; if (m_WithClass != cmp.m_WithClass) return false; if (m_SumErr != cmp.m_SumErr) return false; if (m_SumAbsErr != cmp.m_SumAbsErr) return false; if (m_SumSqrErr != cmp.m_SumSqrErr) return false; if (m_SumClass != cmp.m_SumClass) return false; if (m_SumSqrClass != cmp.m_SumSqrClass) return false; if (m_SumPredicted != cmp.m_SumPredicted) return false; if (m_SumSqrPredicted != cmp.m_SumSqrPredicted) return false; if (m_SumClassPredicted != cmp.m_SumClassPredicted) return false; if (m_ClassIsNominal) { for (int i = 0; i < m_NumClasses; i++) { for (int j = 0; j < m_NumClasses; j++) { if (m_ConfusionMatrix[i][j] != cmp.m_ConfusionMatrix[i][j]) { return false; } } } } return true; } /** * Make up the help string giving all the command line options. * * @param classifier the classifier to include options for * @param globalInfo include the global information string * for the classifier (if available). * @return a string detailing the valid command line options */ protected static String makeOptionString(Classifier classifier, boolean globalInfo) { StringBuffer optionsText = new StringBuffer(""); // General options optionsText.append("\n\nGeneral options:\n\n"); optionsText.append("-h or -help\n"); optionsText.append("\tOutput help information.\n"); optionsText.append("-synopsis or -info\n"); optionsText.append("\tOutput synopsis for classifier (use in conjunction " + " with -h)\n"); optionsText.append("-t