[29] | 1 | /* |
---|
| 2 | * This program is free software; you can redistribute it and/or modify |
---|
| 3 | * it under the terms of the GNU General Public License as published by |
---|
| 4 | * the Free Software Foundation; either version 2 of the License, or |
---|
| 5 | * (at your option) any later version. |
---|
| 6 | * |
---|
| 7 | * This program is distributed in the hope that it will be useful, |
---|
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
| 10 | * GNU General Public License for more details. |
---|
| 11 | * |
---|
| 12 | * You should have received a copy of the GNU General Public License |
---|
| 13 | * along with this program; if not, write to the Free Software |
---|
| 14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
| 15 | */ |
---|
| 16 | |
---|
| 17 | /* |
---|
| 18 | * CostSensitiveClassifierSplitEvaluator.java |
---|
| 19 | * Copyright (C) 2002 University of Waikato, Hamilton, New Zealand |
---|
| 20 | * |
---|
| 21 | */ |
---|
| 22 | |
---|
| 23 | |
---|
| 24 | package weka.experiment; |
---|
| 25 | |
---|
| 26 | import weka.classifiers.Classifier; |
---|
| 27 | import weka.classifiers.AbstractClassifier; |
---|
| 28 | import weka.classifiers.CostMatrix; |
---|
| 29 | import weka.classifiers.Evaluation; |
---|
| 30 | import weka.core.AdditionalMeasureProducer; |
---|
| 31 | import weka.core.Attribute; |
---|
| 32 | import weka.core.Instance; |
---|
| 33 | import weka.core.Instances; |
---|
| 34 | import weka.core.Option; |
---|
| 35 | import weka.core.RevisionUtils; |
---|
| 36 | import weka.core.Summarizable; |
---|
| 37 | import weka.core.Utils; |
---|
| 38 | |
---|
| 39 | import java.io.BufferedReader; |
---|
| 40 | import java.io.ByteArrayOutputStream; |
---|
| 41 | import java.io.File; |
---|
| 42 | import java.io.FileReader; |
---|
| 43 | import java.io.ObjectOutputStream; |
---|
| 44 | import java.lang.management.ManagementFactory; |
---|
| 45 | import java.lang.management.ThreadMXBean; |
---|
| 46 | import java.util.Enumeration; |
---|
| 47 | import java.util.Vector; |
---|
| 48 | |
---|
| 49 | /** |
---|
| 50 | <!-- globalinfo-start --> |
---|
| 51 | * SplitEvaluator that produces results for a classification scheme on a nominal class attribute, including weighted misclassification costs. |
---|
| 52 | * <p/> |
---|
| 53 | <!-- globalinfo-end --> |
---|
| 54 | * |
---|
| 55 | <!-- options-start --> |
---|
| 56 | * Valid options are: <p/> |
---|
| 57 | * |
---|
| 58 | * <pre> -W <class name> |
---|
| 59 | * The full class name of the classifier. |
---|
| 60 | * eg: weka.classifiers.bayes.NaiveBayes</pre> |
---|
| 61 | * |
---|
| 62 | * <pre> -C <index> |
---|
| 63 | * The index of the class for which IR statistics |
---|
| 64 | * are to be output. (default 1)</pre> |
---|
| 65 | * |
---|
| 66 | * <pre> -I <index> |
---|
| 67 | * The index of an attribute to output in the |
---|
| 68 | * results. This attribute should identify an |
---|
| 69 | * instance in order to know which instances are |
---|
| 70 | * in the test set of a cross validation. if 0 |
---|
| 71 | * no output (default 0).</pre> |
---|
| 72 | * |
---|
| 73 | * <pre> -P |
---|
| 74 | * Add target and prediction columns to the result |
---|
| 75 | * for each fold.</pre> |
---|
| 76 | * |
---|
| 77 | * <pre> |
---|
| 78 | * Options specific to classifier weka.classifiers.rules.ZeroR: |
---|
| 79 | * </pre> |
---|
| 80 | * |
---|
| 81 | * <pre> -D |
---|
| 82 | * If set, classifier is run in debug mode and |
---|
| 83 | * may output additional info to the console</pre> |
---|
| 84 | * |
---|
| 85 | * <pre> -D <directory> |
---|
| 86 | * Name of a directory to search for cost files when loading |
---|
| 87 | * costs on demand (default current directory).</pre> |
---|
| 88 | * |
---|
| 89 | <!-- options-end --> |
---|
| 90 | * |
---|
| 91 | * All options after -- will be passed to the classifier. |
---|
| 92 | * |
---|
| 93 | * @author Len Trigg (len@reeltwo.com) |
---|
| 94 | * @version $Revision: 5987 $ |
---|
| 95 | */ |
---|
| 96 | public class CostSensitiveClassifierSplitEvaluator |
---|
| 97 | extends ClassifierSplitEvaluator { |
---|
| 98 | |
---|
| 99 | /** for serialization */ |
---|
| 100 | static final long serialVersionUID = -8069566663019501276L; |
---|
| 101 | |
---|
| 102 | /** |
---|
| 103 | * The directory used when loading cost files on demand, null indicates |
---|
| 104 | * current directory |
---|
| 105 | */ |
---|
| 106 | protected File m_OnDemandDirectory = new File(System.getProperty("user.dir")); |
---|
| 107 | |
---|
| 108 | /** The length of a result */ |
---|
| 109 | private static final int RESULT_SIZE = 31; |
---|
| 110 | |
---|
| 111 | /** |
---|
| 112 | * Returns a string describing this split evaluator |
---|
| 113 | * @return a description of the split evaluator suitable for |
---|
| 114 | * displaying in the explorer/experimenter gui |
---|
| 115 | */ |
---|
| 116 | public String globalInfo() { |
---|
| 117 | return " SplitEvaluator that produces results for a classification scheme " |
---|
| 118 | +"on a nominal class attribute, including weighted misclassification " |
---|
| 119 | +"costs."; |
---|
| 120 | } |
---|
| 121 | |
---|
| 122 | /** |
---|
| 123 | * Returns an enumeration describing the available options.. |
---|
| 124 | * |
---|
| 125 | * @return an enumeration of all the available options. |
---|
| 126 | */ |
---|
| 127 | public Enumeration listOptions() { |
---|
| 128 | |
---|
| 129 | Vector newVector = new Vector(1); |
---|
| 130 | Enumeration enu = super.listOptions(); |
---|
| 131 | while (enu.hasMoreElements()) { |
---|
| 132 | newVector.addElement(enu.nextElement()); |
---|
| 133 | } |
---|
| 134 | |
---|
| 135 | newVector.addElement(new Option( |
---|
| 136 | "\tName of a directory to search for cost files when loading\n" |
---|
| 137 | +"\tcosts on demand (default current directory).", |
---|
| 138 | "D", 1, "-D <directory>")); |
---|
| 139 | |
---|
| 140 | return newVector.elements(); |
---|
| 141 | } |
---|
| 142 | |
---|
| 143 | /** |
---|
| 144 | * Parses a given list of options. <p/> |
---|
| 145 | * |
---|
| 146 | <!-- options-start --> |
---|
| 147 | * Valid options are: <p/> |
---|
| 148 | * |
---|
| 149 | * <pre> -W <class name> |
---|
| 150 | * The full class name of the classifier. |
---|
| 151 | * eg: weka.classifiers.bayes.NaiveBayes</pre> |
---|
| 152 | * |
---|
| 153 | * <pre> -C <index> |
---|
| 154 | * The index of the class for which IR statistics |
---|
| 155 | * are to be output. (default 1)</pre> |
---|
| 156 | * |
---|
| 157 | * <pre> -I <index> |
---|
| 158 | * The index of an attribute to output in the |
---|
| 159 | * results. This attribute should identify an |
---|
| 160 | * instance in order to know which instances are |
---|
| 161 | * in the test set of a cross validation. if 0 |
---|
| 162 | * no output (default 0).</pre> |
---|
| 163 | * |
---|
| 164 | * <pre> -P |
---|
| 165 | * Add target and prediction columns to the result |
---|
| 166 | * for each fold.</pre> |
---|
| 167 | * |
---|
| 168 | * <pre> |
---|
| 169 | * Options specific to classifier weka.classifiers.rules.ZeroR: |
---|
| 170 | * </pre> |
---|
| 171 | * |
---|
| 172 | * <pre> -D |
---|
| 173 | * If set, classifier is run in debug mode and |
---|
| 174 | * may output additional info to the console</pre> |
---|
| 175 | * |
---|
| 176 | * <pre> -D <directory> |
---|
| 177 | * Name of a directory to search for cost files when loading |
---|
| 178 | * costs on demand (default current directory).</pre> |
---|
| 179 | * |
---|
| 180 | <!-- options-end --> |
---|
| 181 | * |
---|
| 182 | * All options after -- will be passed to the classifier. |
---|
| 183 | * |
---|
| 184 | * @param options the list of options as an array of strings |
---|
| 185 | * @throws Exception if an option is not supported |
---|
| 186 | */ |
---|
| 187 | public void setOptions(String[] options) throws Exception { |
---|
| 188 | |
---|
| 189 | String demandDir = Utils.getOption('D', options); |
---|
| 190 | if (demandDir.length() != 0) { |
---|
| 191 | setOnDemandDirectory(new File(demandDir)); |
---|
| 192 | } |
---|
| 193 | |
---|
| 194 | super.setOptions(options); |
---|
| 195 | } |
---|
| 196 | |
---|
| 197 | /** |
---|
| 198 | * Gets the current settings of the Classifier. |
---|
| 199 | * |
---|
| 200 | * @return an array of strings suitable for passing to setOptions |
---|
| 201 | */ |
---|
| 202 | public String [] getOptions() { |
---|
| 203 | |
---|
| 204 | String [] superOptions = super.getOptions(); |
---|
| 205 | String [] options = new String [superOptions.length + 3]; |
---|
| 206 | int current = 0; |
---|
| 207 | |
---|
| 208 | options[current++] = "-D"; |
---|
| 209 | options[current++] = "" + getOnDemandDirectory(); |
---|
| 210 | |
---|
| 211 | System.arraycopy(superOptions, 0, options, current, |
---|
| 212 | superOptions.length); |
---|
| 213 | current += superOptions.length; |
---|
| 214 | while (current < options.length) { |
---|
| 215 | options[current++] = ""; |
---|
| 216 | } |
---|
| 217 | return options; |
---|
| 218 | } |
---|
| 219 | |
---|
| 220 | /** |
---|
| 221 | * Returns the tip text for this property |
---|
| 222 | * @return tip text for this property suitable for |
---|
| 223 | * displaying in the explorer/experimenter gui |
---|
| 224 | */ |
---|
| 225 | public String onDemandDirectoryTipText() { |
---|
| 226 | return "The directory to look in for cost files. This directory will be " |
---|
| 227 | +"searched for cost files when loading on demand."; |
---|
| 228 | } |
---|
| 229 | |
---|
| 230 | /** |
---|
| 231 | * Returns the directory that will be searched for cost files when |
---|
| 232 | * loading on demand. |
---|
| 233 | * |
---|
| 234 | * @return The cost file search directory. |
---|
| 235 | */ |
---|
| 236 | public File getOnDemandDirectory() { |
---|
| 237 | |
---|
| 238 | return m_OnDemandDirectory; |
---|
| 239 | } |
---|
| 240 | |
---|
| 241 | /** |
---|
| 242 | * Sets the directory that will be searched for cost files when |
---|
| 243 | * loading on demand. |
---|
| 244 | * |
---|
| 245 | * @param newDir The cost file search directory. |
---|
| 246 | */ |
---|
| 247 | public void setOnDemandDirectory(File newDir) { |
---|
| 248 | |
---|
| 249 | if (newDir.isDirectory()) { |
---|
| 250 | m_OnDemandDirectory = newDir; |
---|
| 251 | } else { |
---|
| 252 | m_OnDemandDirectory = new File(newDir.getParent()); |
---|
| 253 | } |
---|
| 254 | } |
---|
| 255 | |
---|
| 256 | /** |
---|
| 257 | * Gets the data types of each of the result columns produced for a |
---|
| 258 | * single run. The number of result fields must be constant |
---|
| 259 | * for a given SplitEvaluator. |
---|
| 260 | * |
---|
| 261 | * @return an array containing objects of the type of each result column. |
---|
| 262 | * The objects should be Strings, or Doubles. |
---|
| 263 | */ |
---|
| 264 | public Object [] getResultTypes() { |
---|
| 265 | int addm = (m_AdditionalMeasures != null) |
---|
| 266 | ? m_AdditionalMeasures.length |
---|
| 267 | : 0; |
---|
| 268 | Object [] resultTypes = new Object[RESULT_SIZE+addm]; |
---|
| 269 | Double doub = new Double(0); |
---|
| 270 | int current = 0; |
---|
| 271 | resultTypes[current++] = doub; |
---|
| 272 | resultTypes[current++] = doub; |
---|
| 273 | |
---|
| 274 | resultTypes[current++] = doub; |
---|
| 275 | resultTypes[current++] = doub; |
---|
| 276 | resultTypes[current++] = doub; |
---|
| 277 | resultTypes[current++] = doub; |
---|
| 278 | resultTypes[current++] = doub; |
---|
| 279 | resultTypes[current++] = doub; |
---|
| 280 | resultTypes[current++] = doub; |
---|
| 281 | resultTypes[current++] = doub; |
---|
| 282 | |
---|
| 283 | resultTypes[current++] = doub; |
---|
| 284 | resultTypes[current++] = doub; |
---|
| 285 | resultTypes[current++] = doub; |
---|
| 286 | resultTypes[current++] = doub; |
---|
| 287 | |
---|
| 288 | resultTypes[current++] = doub; |
---|
| 289 | resultTypes[current++] = doub; |
---|
| 290 | resultTypes[current++] = doub; |
---|
| 291 | resultTypes[current++] = doub; |
---|
| 292 | resultTypes[current++] = doub; |
---|
| 293 | resultTypes[current++] = doub; |
---|
| 294 | |
---|
| 295 | resultTypes[current++] = doub; |
---|
| 296 | resultTypes[current++] = doub; |
---|
| 297 | resultTypes[current++] = doub; |
---|
| 298 | |
---|
| 299 | // Timing stats |
---|
| 300 | resultTypes[current++] = doub; |
---|
| 301 | resultTypes[current++] = doub; |
---|
| 302 | resultTypes[current++] = doub; |
---|
| 303 | resultTypes[current++] = doub; |
---|
| 304 | |
---|
| 305 | // sizes |
---|
| 306 | resultTypes[current++] = doub; |
---|
| 307 | resultTypes[current++] = doub; |
---|
| 308 | resultTypes[current++] = doub; |
---|
| 309 | |
---|
| 310 | resultTypes[current++] = ""; |
---|
| 311 | |
---|
| 312 | // add any additional measures |
---|
| 313 | for (int i=0;i<addm;i++) { |
---|
| 314 | resultTypes[current++] = doub; |
---|
| 315 | } |
---|
| 316 | if (current != RESULT_SIZE+addm) { |
---|
| 317 | throw new Error("ResultTypes didn't fit RESULT_SIZE"); |
---|
| 318 | } |
---|
| 319 | return resultTypes; |
---|
| 320 | } |
---|
| 321 | |
---|
| 322 | /** |
---|
| 323 | * Gets the names of each of the result columns produced for a single run. |
---|
| 324 | * The number of result fields must be constant |
---|
| 325 | * for a given SplitEvaluator. |
---|
| 326 | * |
---|
| 327 | * @return an array containing the name of each result column |
---|
| 328 | */ |
---|
| 329 | public String [] getResultNames() { |
---|
| 330 | int addm = (m_AdditionalMeasures != null) |
---|
| 331 | ? m_AdditionalMeasures.length |
---|
| 332 | : 0; |
---|
| 333 | String [] resultNames = new String[RESULT_SIZE+addm]; |
---|
| 334 | int current = 0; |
---|
| 335 | resultNames[current++] = "Number_of_training_instances"; |
---|
| 336 | resultNames[current++] = "Number_of_testing_instances"; |
---|
| 337 | |
---|
| 338 | // Basic performance stats - right vs wrong |
---|
| 339 | resultNames[current++] = "Number_correct"; |
---|
| 340 | resultNames[current++] = "Number_incorrect"; |
---|
| 341 | resultNames[current++] = "Number_unclassified"; |
---|
| 342 | resultNames[current++] = "Percent_correct"; |
---|
| 343 | resultNames[current++] = "Percent_incorrect"; |
---|
| 344 | resultNames[current++] = "Percent_unclassified"; |
---|
| 345 | resultNames[current++] = "Total_cost"; |
---|
| 346 | resultNames[current++] = "Average_cost"; |
---|
| 347 | |
---|
| 348 | // Sensitive stats - certainty of predictions |
---|
| 349 | resultNames[current++] = "Mean_absolute_error"; |
---|
| 350 | resultNames[current++] = "Root_mean_squared_error"; |
---|
| 351 | resultNames[current++] = "Relative_absolute_error"; |
---|
| 352 | resultNames[current++] = "Root_relative_squared_error"; |
---|
| 353 | |
---|
| 354 | // SF stats |
---|
| 355 | resultNames[current++] = "SF_prior_entropy"; |
---|
| 356 | resultNames[current++] = "SF_scheme_entropy"; |
---|
| 357 | resultNames[current++] = "SF_entropy_gain"; |
---|
| 358 | resultNames[current++] = "SF_mean_prior_entropy"; |
---|
| 359 | resultNames[current++] = "SF_mean_scheme_entropy"; |
---|
| 360 | resultNames[current++] = "SF_mean_entropy_gain"; |
---|
| 361 | |
---|
| 362 | // K&B stats |
---|
| 363 | resultNames[current++] = "KB_information"; |
---|
| 364 | resultNames[current++] = "KB_mean_information"; |
---|
| 365 | resultNames[current++] = "KB_relative_information"; |
---|
| 366 | |
---|
| 367 | // Timing stats |
---|
| 368 | resultNames[current++] = "Elapsed_Time_training"; |
---|
| 369 | resultNames[current++] = "Elapsed_Time_testing"; |
---|
| 370 | resultNames[current++] = "UserCPU_Time_training"; |
---|
| 371 | resultNames[current++] = "UserCPU_Time_testing"; |
---|
| 372 | |
---|
| 373 | // sizes |
---|
| 374 | resultNames[current++] = "Serialized_Model_Size"; |
---|
| 375 | resultNames[current++] = "Serialized_Train_Set_Size"; |
---|
| 376 | resultNames[current++] = "Serialized_Test_Set_Size"; |
---|
| 377 | |
---|
| 378 | // Classifier defined extras |
---|
| 379 | resultNames[current++] = "Summary"; |
---|
| 380 | // add any additional measures |
---|
| 381 | for (int i=0;i<addm;i++) { |
---|
| 382 | resultNames[current++] = m_AdditionalMeasures[i]; |
---|
| 383 | } |
---|
| 384 | if (current != RESULT_SIZE+addm) { |
---|
| 385 | throw new Error("ResultNames didn't fit RESULT_SIZE"); |
---|
| 386 | } |
---|
| 387 | return resultNames; |
---|
| 388 | } |
---|
| 389 | |
---|
| 390 | /** |
---|
| 391 | * Gets the results for the supplied train and test datasets. Now performs |
---|
| 392 | * a deep copy of the classifier before it is built and evaluated (just in case |
---|
| 393 | * the classifier is not initialized properly in buildClassifier()). |
---|
| 394 | * |
---|
| 395 | * @param train the training Instances. |
---|
| 396 | * @param test the testing Instances. |
---|
| 397 | * @return the results stored in an array. The objects stored in |
---|
| 398 | * the array may be Strings, Doubles, or null (for the missing value). |
---|
| 399 | * @throws Exception if a problem occurs while getting the results |
---|
| 400 | */ |
---|
| 401 | public Object [] getResult(Instances train, Instances test) |
---|
| 402 | throws Exception { |
---|
| 403 | |
---|
| 404 | if (train.classAttribute().type() != Attribute.NOMINAL) { |
---|
| 405 | throw new Exception("Class attribute is not nominal!"); |
---|
| 406 | } |
---|
| 407 | if (m_Template == null) { |
---|
| 408 | throw new Exception("No classifier has been specified"); |
---|
| 409 | } |
---|
| 410 | ThreadMXBean thMonitor = ManagementFactory.getThreadMXBean(); |
---|
| 411 | boolean canMeasureCPUTime = thMonitor.isThreadCpuTimeSupported(); |
---|
| 412 | if(!thMonitor.isThreadCpuTimeEnabled()) |
---|
| 413 | thMonitor.setThreadCpuTimeEnabled(true); |
---|
| 414 | |
---|
| 415 | int addm = (m_AdditionalMeasures != null) ? m_AdditionalMeasures.length : 0; |
---|
| 416 | Object [] result = new Object[RESULT_SIZE+addm]; |
---|
| 417 | long thID = Thread.currentThread().getId(); |
---|
| 418 | long CPUStartTime=-1, trainCPUTimeElapsed=-1, testCPUTimeElapsed=-1, |
---|
| 419 | trainTimeStart, trainTimeElapsed, testTimeStart, testTimeElapsed; |
---|
| 420 | |
---|
| 421 | String costName = train.relationName() + CostMatrix.FILE_EXTENSION; |
---|
| 422 | File costFile = new File(getOnDemandDirectory(), costName); |
---|
| 423 | if (!costFile.exists()) { |
---|
| 424 | throw new Exception("On-demand cost file doesn't exist: " + costFile); |
---|
| 425 | } |
---|
| 426 | CostMatrix costMatrix = new CostMatrix(new BufferedReader( |
---|
| 427 | new FileReader(costFile))); |
---|
| 428 | |
---|
| 429 | Evaluation eval = new Evaluation(train, costMatrix); |
---|
| 430 | m_Classifier = AbstractClassifier.makeCopy(m_Template); |
---|
| 431 | |
---|
| 432 | trainTimeStart = System.currentTimeMillis(); |
---|
| 433 | if(canMeasureCPUTime) |
---|
| 434 | CPUStartTime = thMonitor.getThreadUserTime(thID); |
---|
| 435 | m_Classifier.buildClassifier(train); |
---|
| 436 | if(canMeasureCPUTime) |
---|
| 437 | trainCPUTimeElapsed = thMonitor.getThreadUserTime(thID) - CPUStartTime; |
---|
| 438 | trainTimeElapsed = System.currentTimeMillis() - trainTimeStart; |
---|
| 439 | testTimeStart = System.currentTimeMillis(); |
---|
| 440 | if(canMeasureCPUTime) |
---|
| 441 | CPUStartTime = thMonitor.getThreadUserTime(thID); |
---|
| 442 | eval.evaluateModel(m_Classifier, test); |
---|
| 443 | if(canMeasureCPUTime) |
---|
| 444 | testCPUTimeElapsed = thMonitor.getThreadUserTime(thID) - CPUStartTime; |
---|
| 445 | testTimeElapsed = System.currentTimeMillis() - testTimeStart; |
---|
| 446 | thMonitor = null; |
---|
| 447 | |
---|
| 448 | m_result = eval.toSummaryString(); |
---|
| 449 | // The results stored are all per instance -- can be multiplied by the |
---|
| 450 | // number of instances to get absolute numbers |
---|
| 451 | int current = 0; |
---|
| 452 | result[current++] = new Double(train.numInstances()); |
---|
| 453 | result[current++] = new Double(eval.numInstances()); |
---|
| 454 | |
---|
| 455 | result[current++] = new Double(eval.correct()); |
---|
| 456 | result[current++] = new Double(eval.incorrect()); |
---|
| 457 | result[current++] = new Double(eval.unclassified()); |
---|
| 458 | result[current++] = new Double(eval.pctCorrect()); |
---|
| 459 | result[current++] = new Double(eval.pctIncorrect()); |
---|
| 460 | result[current++] = new Double(eval.pctUnclassified()); |
---|
| 461 | result[current++] = new Double(eval.totalCost()); |
---|
| 462 | result[current++] = new Double(eval.avgCost()); |
---|
| 463 | |
---|
| 464 | result[current++] = new Double(eval.meanAbsoluteError()); |
---|
| 465 | result[current++] = new Double(eval.rootMeanSquaredError()); |
---|
| 466 | result[current++] = new Double(eval.relativeAbsoluteError()); |
---|
| 467 | result[current++] = new Double(eval.rootRelativeSquaredError()); |
---|
| 468 | |
---|
| 469 | result[current++] = new Double(eval.SFPriorEntropy()); |
---|
| 470 | result[current++] = new Double(eval.SFSchemeEntropy()); |
---|
| 471 | result[current++] = new Double(eval.SFEntropyGain()); |
---|
| 472 | result[current++] = new Double(eval.SFMeanPriorEntropy()); |
---|
| 473 | result[current++] = new Double(eval.SFMeanSchemeEntropy()); |
---|
| 474 | result[current++] = new Double(eval.SFMeanEntropyGain()); |
---|
| 475 | |
---|
| 476 | // K&B stats |
---|
| 477 | result[current++] = new Double(eval.KBInformation()); |
---|
| 478 | result[current++] = new Double(eval.KBMeanInformation()); |
---|
| 479 | result[current++] = new Double(eval.KBRelativeInformation()); |
---|
| 480 | |
---|
| 481 | // Timing stats |
---|
| 482 | result[current++] = new Double(trainTimeElapsed / 1000.0); |
---|
| 483 | result[current++] = new Double(testTimeElapsed / 1000.0); |
---|
| 484 | if(canMeasureCPUTime) { |
---|
| 485 | result[current++] = new Double((trainCPUTimeElapsed/1000000.0) / 1000.0); |
---|
| 486 | result[current++] = new Double((testCPUTimeElapsed /1000000.0) / 1000.0); |
---|
| 487 | } |
---|
| 488 | else { |
---|
| 489 | result[current++] = new Double(Utils.missingValue()); |
---|
| 490 | result[current++] = new Double(Utils.missingValue()); |
---|
| 491 | } |
---|
| 492 | |
---|
| 493 | // sizes |
---|
| 494 | ByteArrayOutputStream bastream = new ByteArrayOutputStream(); |
---|
| 495 | ObjectOutputStream oostream = new ObjectOutputStream(bastream); |
---|
| 496 | oostream.writeObject(m_Classifier); |
---|
| 497 | result[current++] = new Double(bastream.size()); |
---|
| 498 | bastream = new ByteArrayOutputStream(); |
---|
| 499 | oostream = new ObjectOutputStream(bastream); |
---|
| 500 | oostream.writeObject(train); |
---|
| 501 | result[current++] = new Double(bastream.size()); |
---|
| 502 | bastream = new ByteArrayOutputStream(); |
---|
| 503 | oostream = new ObjectOutputStream(bastream); |
---|
| 504 | oostream.writeObject(test); |
---|
| 505 | result[current++] = new Double(bastream.size()); |
---|
| 506 | |
---|
| 507 | if (m_Classifier instanceof Summarizable) { |
---|
| 508 | result[current++] = ((Summarizable)m_Classifier).toSummaryString(); |
---|
| 509 | } else { |
---|
| 510 | result[current++] = null; |
---|
| 511 | } |
---|
| 512 | |
---|
| 513 | for (int i=0;i<addm;i++) { |
---|
| 514 | if (m_doesProduce[i]) { |
---|
| 515 | try { |
---|
| 516 | double dv = ((AdditionalMeasureProducer)m_Classifier). |
---|
| 517 | getMeasure(m_AdditionalMeasures[i]); |
---|
| 518 | if (!Utils.isMissingValue(dv)) { |
---|
| 519 | Double value = new Double(dv); |
---|
| 520 | result[current++] = value; |
---|
| 521 | } else { |
---|
| 522 | result[current++] = null; |
---|
| 523 | } |
---|
| 524 | } catch (Exception ex) { |
---|
| 525 | System.err.println(ex); |
---|
| 526 | } |
---|
| 527 | } else { |
---|
| 528 | result[current++] = null; |
---|
| 529 | } |
---|
| 530 | } |
---|
| 531 | |
---|
| 532 | if (current != RESULT_SIZE+addm) { |
---|
| 533 | throw new Error("Results didn't fit RESULT_SIZE"); |
---|
| 534 | } |
---|
| 535 | return result; |
---|
| 536 | } |
---|
| 537 | |
---|
| 538 | /** |
---|
| 539 | * Returns a text description of the split evaluator. |
---|
| 540 | * |
---|
| 541 | * @return a text description of the split evaluator. |
---|
| 542 | */ |
---|
| 543 | public String toString() { |
---|
| 544 | |
---|
| 545 | String result = "CostSensitiveClassifierSplitEvaluator: "; |
---|
| 546 | if (m_Template == null) { |
---|
| 547 | return result + "<null> classifier"; |
---|
| 548 | } |
---|
| 549 | return result + m_Template.getClass().getName() + " " |
---|
| 550 | + m_ClassifierOptions + "(version " + m_ClassifierVersion + ")"; |
---|
| 551 | } |
---|
| 552 | |
---|
| 553 | /** |
---|
| 554 | * Returns the revision string. |
---|
| 555 | * |
---|
| 556 | * @return the revision |
---|
| 557 | */ |
---|
| 558 | public String getRevision() { |
---|
| 559 | return RevisionUtils.extract("$Revision: 5987 $"); |
---|
| 560 | } |
---|
| 561 | } // CostSensitiveClassifierSplitEvaluator |
---|