Context Navigation

source: src/main/java/weka/experiment/ClassifierSplitEvaluator.java @ 13

Last change on this file since 13 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 35.7 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* ClassifierSplitEvaluator.java
	19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23
	24	package weka.experiment;
	25
	26	import weka.classifiers.Classifier;
	27	import weka.classifiers.AbstractClassifier;
	28	import weka.classifiers.Evaluation;
	29	import weka.classifiers.rules.ZeroR;
	30	import weka.core.AdditionalMeasureProducer;
	31	import weka.core.Attribute;
	32	import weka.core.Instance;
	33	import weka.core.Instances;
	34	import weka.core.Option;
	35	import weka.core.OptionHandler;
	36	import weka.core.RevisionHandler;
	37	import weka.core.RevisionUtils;
	38	import weka.core.Summarizable;
	39	import weka.core.Utils;
	40
	41	import java.io.ByteArrayOutputStream;
	42	import java.io.ObjectOutputStream;
	43	import java.io.ObjectStreamClass;
	44	import java.io.Serializable;
	45	import java.lang.management.ManagementFactory;
	46	import java.lang.management.ThreadMXBean;
	47	import java.util.Enumeration;
	48	import java.util.Vector;
	49
	50
	51	/**
	52	<!-- globalinfo-start -->
	53	* A SplitEvaluator that produces results for a classification scheme on a nominal class attribute.
	54	* <p/>
	55	<!-- globalinfo-end -->
	56	*
	57	<!-- options-start -->
	58	* Valid options are: <p/>
	59	*
	60	* <pre> -W <class name>
	61	* The full class name of the classifier.
	62	* eg: weka.classifiers.bayes.NaiveBayes</pre>
	63	*
	64	* <pre> -C <index>
	65	* The index of the class for which IR statistics
	66	* are to be output. (default 1)</pre>
	67	*
	68	* <pre> -I <index>
	69	* The index of an attribute to output in the
	70	* results. This attribute should identify an
	71	* instance in order to know which instances are
	72	* in the test set of a cross validation. if 0
	73	* no output (default 0).</pre>
	74	*
	75	* <pre> -P
	76	* Add target and prediction columns to the result
	77	* for each fold.</pre>
	78	*
	79	* <pre>
	80	* Options specific to classifier weka.classifiers.rules.ZeroR:
	81	* </pre>
	82	*
	83	* <pre> -D
	84	* If set, classifier is run in debug mode and
	85	* may output additional info to the console</pre>
	86	*
	87	<!-- options-end -->
	88	*
	89	* All options after -- will be passed to the classifier.
	90	*
	91	* @author Len Trigg (trigg@cs.waikato.ac.nz)
	92	* @version $Revision: 5987 $
	93	*/
	94	public class ClassifierSplitEvaluator
	95	implements SplitEvaluator, OptionHandler, AdditionalMeasureProducer,
	96	RevisionHandler {
	97
	98	/** for serialization */
	99	static final long serialVersionUID = -8511241602760467265L;
	100
	101	/** The template classifier */
	102	protected Classifier m_Template = new ZeroR();
	103
	104	/** The classifier used for evaluation */
	105	protected Classifier m_Classifier;
	106
	107	/** The names of any additional measures to look for in SplitEvaluators */
	108	protected String [] m_AdditionalMeasures = null;
	109
	110	/** Array of booleans corresponding to the measures in m_AdditionalMeasures
	111	indicating which of the AdditionalMeasures the current classifier
	112	can produce */
	113	protected boolean [] m_doesProduce = null;
	114
	115	/** The number of additional measures that need to be filled in
	116	after taking into account column constraints imposed by the final
	117	destination for results */
	118	protected int m_numberAdditionalMeasures = 0;
	119
	120	/** Holds the statistics for the most recent application of the classifier */
	121	protected String m_result = null;
	122
	123	/** The classifier options (if any) */
	124	protected String m_ClassifierOptions = "";
	125
	126	/** The classifier version */
	127	protected String m_ClassifierVersion = "";
	128
	129	/** The length of a key */
	130	private static final int KEY_SIZE = 3;
	131
	132	/** The length of a result */
	133	private static final int RESULT_SIZE = 30;
	134
	135	/** The number of IR statistics */
	136	private static final int NUM_IR_STATISTICS = 14;
	137
	138	/** The number of averaged IR statistics */
	139	private static final int NUM_WEIGHTED_IR_STATISTICS = 8;
	140
	141	/** The number of unweighted averaged IR statistics */
	142	private static final int NUM_UNWEIGHTED_IR_STATISTICS = 2;
	143
	144	/** Class index for information retrieval statistics (default 0) */
	145	private int m_IRclass = 0;
	146
	147	/** Flag for prediction and target columns output.*/
	148	private boolean m_predTargetColumn = false;
	149
	150	/** Attribute index of instance identifier (default -1) */
	151	private int m_attID = -1;
	152
	153	/**
	154	* No args constructor.
	155	*/
	156	public ClassifierSplitEvaluator() {
	157
	158	updateOptions();
	159	}
	160
	161	/**
	162	* Returns a string describing this split evaluator
	163	* @return a description of the split evaluator suitable for
	164	* displaying in the explorer/experimenter gui
	165	*/
	166	public String globalInfo() {
	167	return " A SplitEvaluator that produces results for a classification "
	168	+"scheme on a nominal class attribute.";
	169	}
	170
	171	/**
	172	* Returns an enumeration describing the available options..
	173	*
	174	* @return an enumeration of all the available options.
	175	*/
	176	public Enumeration listOptions() {
	177
	178	Vector newVector = new Vector(4);
	179
	180	newVector.addElement(new Option(
	181	"\tThe full class name of the classifier.\n"
	182	+"\teg: weka.classifiers.bayes.NaiveBayes",
	183	"W", 1,
	184	"-W <class name>"));
	185	newVector.addElement(new Option(
	186	"\tThe index of the class for which IR statistics\n" +
	187	"\tare to be output. (default 1)",
	188	"C", 1,
	189	"-C <index>"));
	190	newVector.addElement(new Option(
	191	"\tThe index of an attribute to output in the\n" +
	192	"\tresults. This attribute should identify an\n" +
	193	"\tinstance in order to know which instances are\n" +
	194	"\tin the test set of a cross validation. if 0\n" +
	195	"\tno output (default 0).",
	196	"I", 1,
	197	"-I <index>"));
	198	newVector.addElement(new Option(
	199	"\tAdd target and prediction columns to the result\n" +
	200	"\tfor each fold.",
	201	"P", 0,
	202	"-P"));
	203
	204	if ((m_Template != null) &&
	205	(m_Template instanceof OptionHandler)) {
	206	newVector.addElement(new Option(
	207	"",
	208	"", 0, "\nOptions specific to classifier "
	209	+ m_Template.getClass().getName() + ":"));
	210	Enumeration enu = ((OptionHandler)m_Template).listOptions();
	211	while (enu.hasMoreElements()) {
	212	newVector.addElement(enu.nextElement());
	213	}
	214	}
	215	return newVector.elements();
	216	}
	217
	218	/**
	219	* Parses a given list of options. <p/>
	220	*
	221	<!-- options-start -->
	222	* Valid options are: <p/>
	223	*
	224	* <pre> -W <class name>
	225	* The full class name of the classifier.
	226	* eg: weka.classifiers.bayes.NaiveBayes</pre>
	227	*
	228	* <pre> -C <index>
	229	* The index of the class for which IR statistics
	230	* are to be output. (default 1)</pre>
	231	*
	232	* <pre> -I <index>
	233	* The index of an attribute to output in the
	234	* results. This attribute should identify an
	235	* instance in order to know which instances are
	236	* in the test set of a cross validation. if 0
	237	* no output (default 0).</pre>
	238	*
	239	* <pre> -P
	240	* Add target and prediction columns to the result
	241	* for each fold.</pre>
	242	*
	243	* <pre>
	244	* Options specific to classifier weka.classifiers.rules.ZeroR:
	245	* </pre>
	246	*
	247	* <pre> -D
	248	* If set, classifier is run in debug mode and
	249	* may output additional info to the console</pre>
	250	*
	251	<!-- options-end -->
	252	*
	253	* All options after -- will be passed to the classifier.
	254	*
	255	* @param options the list of options as an array of strings
	256	* @throws Exception if an option is not supported
	257	*/
	258	public void setOptions(String[] options) throws Exception {
	259
	260	String cName = Utils.getOption('W', options);
	261	if (cName.length() == 0) {
	262	throw new Exception("A classifier must be specified with"
	263	+ " the -W option.");
	264	}
	265	// Do it first without options, so if an exception is thrown during
	266	// the option setting, listOptions will contain options for the actual
	267	// Classifier.
	268	setClassifier(AbstractClassifier.forName(cName, null));
	269	if (getClassifier() instanceof OptionHandler) {
	270	((OptionHandler) getClassifier())
	271	.setOptions(Utils.partitionOptions(options));
	272	updateOptions();
	273	}
	274
	275	String indexName = Utils.getOption('C', options);
	276	if (indexName.length() != 0) {
	277	m_IRclass = (new Integer(indexName)).intValue() - 1;
	278	} else {
	279	m_IRclass = 0;
	280	}
	281
	282	String attID = Utils.getOption('I', options);
	283	if (attID.length() != 0) {
	284	m_attID = (new Integer(attID)).intValue() - 1;
	285	} else {
	286	m_attID = -1;
	287	}
	288
	289	m_predTargetColumn = Utils.getFlag('P', options);
	290	}
	291
	292	/**
	293	* Gets the current settings of the Classifier.
	294	*
	295	* @return an array of strings suitable for passing to setOptions
	296	*/
	297	public String [] getOptions() {
	298
	299	String [] classifierOptions = new String [0];
	300	if ((m_Template != null) &&
	301	(m_Template instanceof OptionHandler)) {
	302	classifierOptions = ((OptionHandler)m_Template).getOptions();
	303	}
	304
	305	String [] options = new String [classifierOptions.length + 8];
	306	int current = 0;
	307
	308	if (getClassifier() != null) {
	309	options[current++] = "-W";
	310	options[current++] = getClassifier().getClass().getName();
	311	}
	312	options[current++] = "-I";
	313	options[current++] = "" + (m_attID + 1);
	314
	315	if (getPredTargetColumn()) options[current++] = "-P";
	316
	317	options[current++] = "-C";
	318	options[current++] = "" + (m_IRclass + 1);
	319	options[current++] = "--";
	320
	321	System.arraycopy(classifierOptions, 0, options, current,
	322	classifierOptions.length);
	323	current += classifierOptions.length;
	324	while (current < options.length) {
	325	options[current++] = "";
	326	}
	327	return options;
	328	}
	329
	330	/**
	331	* Set a list of method names for additional measures to look for
	332	* in Classifiers. This could contain many measures (of which only a
	333	* subset may be produceable by the current Classifier) if an experiment
	334	* is the type that iterates over a set of properties.
	335	* @param additionalMeasures a list of method names
	336	*/
	337	public void setAdditionalMeasures(String [] additionalMeasures) {
	338	// System.err.println("ClassifierSplitEvaluator: setting additional measures");
	339	m_AdditionalMeasures = additionalMeasures;
	340
	341	// determine which (if any) of the additional measures this classifier
	342	// can produce
	343	if (m_AdditionalMeasures != null && m_AdditionalMeasures.length > 0) {
	344	m_doesProduce = new boolean [m_AdditionalMeasures.length];
	345
	346	if (m_Template instanceof AdditionalMeasureProducer) {
	347	Enumeration en = ((AdditionalMeasureProducer)m_Template).
	348	enumerateMeasures();
	349	while (en.hasMoreElements()) {
	350	String mname = (String)en.nextElement();
	351	for (int j=0;j<m_AdditionalMeasures.length;j++) {
	352	if (mname.compareToIgnoreCase(m_AdditionalMeasures[j]) == 0) {
	353	m_doesProduce[j] = true;
	354	}
	355	}
	356	}
	357	}
	358	} else {
	359	m_doesProduce = null;
	360	}
	361	}
	362
	363	/**
	364	* Returns an enumeration of any additional measure names that might be
	365	* in the classifier
	366	* @return an enumeration of the measure names
	367	*/
	368	public Enumeration enumerateMeasures() {
	369	Vector newVector = new Vector();
	370	if (m_Template instanceof AdditionalMeasureProducer) {
	371	Enumeration en = ((AdditionalMeasureProducer)m_Template).
	372	enumerateMeasures();
	373	while (en.hasMoreElements()) {
	374	String mname = (String)en.nextElement();
	375	newVector.addElement(mname);
	376	}
	377	}
	378	return newVector.elements();
	379	}
	380
	381	/**
	382	* Returns the value of the named measure
	383	* @param additionalMeasureName the name of the measure to query for its value
	384	* @return the value of the named measure
	385	* @throws IllegalArgumentException if the named measure is not supported
	386	*/
	387	public double getMeasure(String additionalMeasureName) {
	388	if (m_Template instanceof AdditionalMeasureProducer) {
	389	if (m_Classifier == null) {
	390	throw new IllegalArgumentException("ClassifierSplitEvaluator: " +
	391	"Can't return result for measure, " +
	392	"classifier has not been built yet.");
	393	}
	394	return ((AdditionalMeasureProducer)m_Classifier).
	395	getMeasure(additionalMeasureName);
	396	} else {
	397	throw new IllegalArgumentException("ClassifierSplitEvaluator: "
	398	+"Can't return value for : "+additionalMeasureName
	399	+". "+m_Template.getClass().getName()+" "
	400	+"is not an AdditionalMeasureProducer");
	401	}
	402	}
	403
	404	/**
	405	* Gets the data types of each of the key columns produced for a single run.
	406	* The number of key fields must be constant
	407	* for a given SplitEvaluator.
	408	*
	409	* @return an array containing objects of the type of each key column. The
	410	* objects should be Strings, or Doubles.
	411	*/
	412	public Object [] getKeyTypes() {
	413
	414	Object [] keyTypes = new Object[KEY_SIZE];
	415	keyTypes[0] = "";
	416	keyTypes[1] = "";
	417	keyTypes[2] = "";
	418	return keyTypes;
	419	}
	420
	421	/**
	422	* Gets the names of each of the key columns produced for a single run.
	423	* The number of key fields must be constant
	424	* for a given SplitEvaluator.
	425	*
	426	* @return an array containing the name of each key column
	427	*/
	428	public String [] getKeyNames() {
	429
	430	String [] keyNames = new String[KEY_SIZE];
	431	keyNames[0] = "Scheme";
	432	keyNames[1] = "Scheme_options";
	433	keyNames[2] = "Scheme_version_ID";
	434	return keyNames;
	435	}
	436
	437	/**
	438	* Gets the key describing the current SplitEvaluator. For example
	439	* This may contain the name of the classifier used for classifier
	440	* predictive evaluation. The number of key fields must be constant
	441	* for a given SplitEvaluator.
	442	*
	443	* @return an array of objects containing the key.
	444	*/
	445	public Object [] getKey(){
	446
	447	Object [] key = new Object[KEY_SIZE];
	448	key[0] = m_Template.getClass().getName();
	449	key[1] = m_ClassifierOptions;
	450	key[2] = m_ClassifierVersion;
	451	return key;
	452	}
	453
	454	/**
	455	* Gets the data types of each of the result columns produced for a
	456	* single run. The number of result fields must be constant
	457	* for a given SplitEvaluator.
	458	*
	459	* @return an array containing objects of the type of each result column.
	460	* The objects should be Strings, or Doubles.
	461	*/
	462	public Object [] getResultTypes() {
	463	int addm = (m_AdditionalMeasures != null)
	464	? m_AdditionalMeasures.length
	465	: 0;
	466	int overall_length = RESULT_SIZE+addm;
	467	overall_length += NUM_IR_STATISTICS;
	468	overall_length += NUM_WEIGHTED_IR_STATISTICS;
	469	overall_length += NUM_UNWEIGHTED_IR_STATISTICS;
	470	if (getAttributeID() >= 0) overall_length += 1;
	471	if (getPredTargetColumn()) overall_length += 2;
	472	Object [] resultTypes = new Object[overall_length];
	473	Double doub = new Double(0);
	474	int current = 0;
	475	resultTypes[current++] = doub;
	476	resultTypes[current++] = doub;
	477
	478	resultTypes[current++] = doub;
	479	resultTypes[current++] = doub;
	480	resultTypes[current++] = doub;
	481	resultTypes[current++] = doub;
	482	resultTypes[current++] = doub;
	483	resultTypes[current++] = doub;
	484
	485	resultTypes[current++] = doub;
	486	resultTypes[current++] = doub;
	487	resultTypes[current++] = doub;
	488	resultTypes[current++] = doub;
	489
	490	resultTypes[current++] = doub;
	491	resultTypes[current++] = doub;
	492	resultTypes[current++] = doub;
	493	resultTypes[current++] = doub;
	494	resultTypes[current++] = doub;
	495	resultTypes[current++] = doub;
	496
	497	resultTypes[current++] = doub;
	498	resultTypes[current++] = doub;
	499	resultTypes[current++] = doub;
	500	resultTypes[current++] = doub;
	501
	502	// IR stats
	503	resultTypes[current++] = doub;
	504	resultTypes[current++] = doub;
	505	resultTypes[current++] = doub;
	506	resultTypes[current++] = doub;
	507	resultTypes[current++] = doub;
	508	resultTypes[current++] = doub;
	509	resultTypes[current++] = doub;
	510	resultTypes[current++] = doub;
	511	resultTypes[current++] = doub;
	512	resultTypes[current++] = doub;
	513	resultTypes[current++] = doub;
	514	resultTypes[current++] = doub;
	515
	516	// Unweighted IR stats
	517	resultTypes[current++] = doub;
	518	resultTypes[current++] = doub;
	519
	520	// Weighted IR stats
	521	resultTypes[current++] = doub;
	522	resultTypes[current++] = doub;
	523	resultTypes[current++] = doub;
	524	resultTypes[current++] = doub;
	525	resultTypes[current++] = doub;
	526	resultTypes[current++] = doub;
	527	resultTypes[current++] = doub;
	528	resultTypes[current++] = doub;
	529
	530	// Timing stats
	531	resultTypes[current++] = doub;
	532	resultTypes[current++] = doub;
	533	resultTypes[current++] = doub;
	534	resultTypes[current++] = doub;
	535
	536	// sizes
	537	resultTypes[current++] = doub;
	538	resultTypes[current++] = doub;
	539	resultTypes[current++] = doub;
	540
	541	// Prediction interval statistics
	542	resultTypes[current++] = doub;
	543	resultTypes[current++] = doub;
	544
	545	// ID/Targets/Predictions
	546	if (getAttributeID() >= 0) resultTypes[current++] = "";
	547	if (getPredTargetColumn()){
	548	resultTypes[current++] = "";
	549	resultTypes[current++] = "";
	550	}
	551
	552	// Classifier defined extras
	553	resultTypes[current++] = "";
	554
	555	// add any additional measures
	556	for (int i=0;i<addm;i++) {
	557	resultTypes[current++] = doub;
	558	}
	559	if (current != overall_length) {
	560	throw new Error("ResultTypes didn't fit RESULT_SIZE");
	561	}
	562	return resultTypes;
	563	}
	564
	565	/**
	566	* Gets the names of each of the result columns produced for a single run.
	567	* The number of result fields must be constant
	568	* for a given SplitEvaluator.
	569	*
	570	* @return an array containing the name of each result column
	571	*/
	572	public String [] getResultNames() {
	573	int addm = (m_AdditionalMeasures != null)
	574	? m_AdditionalMeasures.length
	575	: 0;
	576	int overall_length = RESULT_SIZE+addm;
	577	overall_length += NUM_IR_STATISTICS;
	578	overall_length += NUM_WEIGHTED_IR_STATISTICS;
	579	overall_length += NUM_UNWEIGHTED_IR_STATISTICS;
	580	if (getAttributeID() >= 0) overall_length += 1;
	581	if (getPredTargetColumn()) overall_length += 2;
	582
	583	String [] resultNames = new String[overall_length];
	584	int current = 0;
	585	resultNames[current++] = "Number_of_training_instances";
	586	resultNames[current++] = "Number_of_testing_instances";
	587
	588	// Basic performance stats - right vs wrong
	589	resultNames[current++] = "Number_correct";
	590	resultNames[current++] = "Number_incorrect";
	591	resultNames[current++] = "Number_unclassified";
	592	resultNames[current++] = "Percent_correct";
	593	resultNames[current++] = "Percent_incorrect";
	594	resultNames[current++] = "Percent_unclassified";
	595	resultNames[current++] = "Kappa_statistic";
	596
	597	// Sensitive stats - certainty of predictions
	598	resultNames[current++] = "Mean_absolute_error";
	599	resultNames[current++] = "Root_mean_squared_error";
	600	resultNames[current++] = "Relative_absolute_error";
	601	resultNames[current++] = "Root_relative_squared_error";
	602
	603	// SF stats
	604	resultNames[current++] = "SF_prior_entropy";
	605	resultNames[current++] = "SF_scheme_entropy";
	606	resultNames[current++] = "SF_entropy_gain";
	607	resultNames[current++] = "SF_mean_prior_entropy";
	608	resultNames[current++] = "SF_mean_scheme_entropy";
	609	resultNames[current++] = "SF_mean_entropy_gain";
	610
	611	// K&B stats
	612	resultNames[current++] = "KB_information";
	613	resultNames[current++] = "KB_mean_information";
	614	resultNames[current++] = "KB_relative_information";
	615
	616	// IR stats
	617	resultNames[current++] = "True_positive_rate";
	618	resultNames[current++] = "Num_true_positives";
	619	resultNames[current++] = "False_positive_rate";
	620	resultNames[current++] = "Num_false_positives";
	621	resultNames[current++] = "True_negative_rate";
	622	resultNames[current++] = "Num_true_negatives";
	623	resultNames[current++] = "False_negative_rate";
	624	resultNames[current++] = "Num_false_negatives";
	625	resultNames[current++] = "IR_precision";
	626	resultNames[current++] = "IR_recall";
	627	resultNames[current++] = "F_measure";
	628	resultNames[current++] = "Area_under_ROC";
	629
	630	// Weighted IR stats
	631	resultNames[current++] = "Weighted_avg_true_positive_rate";
	632	resultNames[current++] = "Weighted_avg_false_positive_rate";
	633	resultNames[current++] = "Weighted_avg_true_negative_rate";
	634	resultNames[current++] = "Weighted_avg_false_negative_rate";
	635	resultNames[current++] = "Weighted_avg_IR_precision";
	636	resultNames[current++] = "Weighted_avg_IR_recall";
	637	resultNames[current++] = "Weighted_avg_F_measure";
	638	resultNames[current++] = "Weighted_avg_area_under_ROC";
	639
	640	// Unweighted IR stats
	641	resultNames[current++] = "Unweighted_macro_avg_F_measure";
	642	resultNames[current++] = "Unweighted_micro_avg_F_measure";
	643
	644	// Timing stats
	645	resultNames[current++] = "Elapsed_Time_training";
	646	resultNames[current++] = "Elapsed_Time_testing";
	647	resultNames[current++] = "UserCPU_Time_training";
	648	resultNames[current++] = "UserCPU_Time_testing";
	649
	650	// sizes
	651	resultNames[current++] = "Serialized_Model_Size";
	652	resultNames[current++] = "Serialized_Train_Set_Size";
	653	resultNames[current++] = "Serialized_Test_Set_Size";
	654
	655	// Prediction interval statistics
	656	resultNames[current++] = "Coverage_of_Test_Cases_By_Regions";
	657	resultNames[current++] = "Size_of_Predicted_Regions";
	658
	659	// ID/Targets/Predictions
	660	if (getAttributeID() >= 0) resultNames[current++] = "Instance_ID";
	661	if (getPredTargetColumn()){
	662	resultNames[current++] = "Targets";
	663	resultNames[current++] = "Predictions";
	664	}
	665
	666	// Classifier defined extras
	667	resultNames[current++] = "Summary";
	668	// add any additional measures
	669	for (int i=0;i<addm;i++) {
	670	resultNames[current++] = m_AdditionalMeasures[i];
	671	}
	672	if (current != overall_length) {
	673	throw new Error("ResultNames didn't fit RESULT_SIZE");
	674	}
	675	return resultNames;
	676	}
	677
	678	/**
	679	* Gets the results for the supplied train and test datasets. Now performs
	680	* a deep copy of the classifier before it is built and evaluated (just in case
	681	* the classifier is not initialized properly in buildClassifier()).
	682	*
	683	* @param train the training Instances.
	684	* @param test the testing Instances.
	685	* @return the results stored in an array. The objects stored in
	686	* the array may be Strings, Doubles, or null (for the missing value).
	687	* @throws Exception if a problem occurs while getting the results
	688	*/
	689	public Object [] getResult(Instances train, Instances test)
	690	throws Exception {
	691
	692	if (train.classAttribute().type() != Attribute.NOMINAL) {
	693	throw new Exception("Class attribute is not nominal!");
	694	}
	695	if (m_Template == null) {
	696	throw new Exception("No classifier has been specified");
	697	}
	698	int addm = (m_AdditionalMeasures != null) ? m_AdditionalMeasures.length : 0;
	699	int overall_length = RESULT_SIZE+addm;
	700	overall_length += NUM_IR_STATISTICS;
	701	overall_length += NUM_WEIGHTED_IR_STATISTICS;
	702	overall_length += NUM_UNWEIGHTED_IR_STATISTICS;
	703	if (getAttributeID() >= 0) overall_length += 1;
	704	if (getPredTargetColumn()) overall_length += 2;
	705
	706	ThreadMXBean thMonitor = ManagementFactory.getThreadMXBean();
	707	boolean canMeasureCPUTime = thMonitor.isThreadCpuTimeSupported();
	708	if(!thMonitor.isThreadCpuTimeEnabled())
	709	thMonitor.setThreadCpuTimeEnabled(true);
	710
	711	Object [] result = new Object[overall_length];
	712	Evaluation eval = new Evaluation(train);
	713	m_Classifier = AbstractClassifier.makeCopy(m_Template);
	714	double [] predictions;
	715	long thID = Thread.currentThread().getId();
	716	long CPUStartTime=-1, trainCPUTimeElapsed=-1, testCPUTimeElapsed=-1,
	717	trainTimeStart, trainTimeElapsed, testTimeStart, testTimeElapsed;
	718
	719	//training classifier
	720	trainTimeStart = System.currentTimeMillis();
	721	if(canMeasureCPUTime)
	722	CPUStartTime = thMonitor.getThreadUserTime(thID);
	723	m_Classifier.buildClassifier(train);
	724	if(canMeasureCPUTime)
	725	trainCPUTimeElapsed = thMonitor.getThreadUserTime(thID) - CPUStartTime;
	726	trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
	727
	728	//testing classifier
	729	testTimeStart = System.currentTimeMillis();
	730	if(canMeasureCPUTime)
	731	CPUStartTime = thMonitor.getThreadUserTime(thID);
	732	predictions = eval.evaluateModel(m_Classifier, test);
	733	if(canMeasureCPUTime)
	734	testCPUTimeElapsed = thMonitor.getThreadUserTime(thID) - CPUStartTime;
	735	testTimeElapsed = System.currentTimeMillis() - testTimeStart;
	736	thMonitor = null;
	737
	738	m_result = eval.toSummaryString();
	739	// The results stored are all per instance -- can be multiplied by the
	740	// number of instances to get absolute numbers
	741	int current = 0;
	742	result[current++] = new Double(train.numInstances());
	743	result[current++] = new Double(eval.numInstances());
	744	result[current++] = new Double(eval.correct());
	745	result[current++] = new Double(eval.incorrect());
	746	result[current++] = new Double(eval.unclassified());
	747	result[current++] = new Double(eval.pctCorrect());
	748	result[current++] = new Double(eval.pctIncorrect());
	749	result[current++] = new Double(eval.pctUnclassified());
	750	result[current++] = new Double(eval.kappa());
	751
	752	result[current++] = new Double(eval.meanAbsoluteError());
	753	result[current++] = new Double(eval.rootMeanSquaredError());
	754	result[current++] = new Double(eval.relativeAbsoluteError());
	755	result[current++] = new Double(eval.rootRelativeSquaredError());
	756
	757	result[current++] = new Double(eval.SFPriorEntropy());
	758	result[current++] = new Double(eval.SFSchemeEntropy());
	759	result[current++] = new Double(eval.SFEntropyGain());
	760	result[current++] = new Double(eval.SFMeanPriorEntropy());
	761	result[current++] = new Double(eval.SFMeanSchemeEntropy());
	762	result[current++] = new Double(eval.SFMeanEntropyGain());
	763
	764	// K&B stats
	765	result[current++] = new Double(eval.KBInformation());
	766	result[current++] = new Double(eval.KBMeanInformation());
	767	result[current++] = new Double(eval.KBRelativeInformation());
	768
	769	// IR stats
	770	result[current++] = new Double(eval.truePositiveRate(m_IRclass));
	771	result[current++] = new Double(eval.numTruePositives(m_IRclass));
	772	result[current++] = new Double(eval.falsePositiveRate(m_IRclass));
	773	result[current++] = new Double(eval.numFalsePositives(m_IRclass));
	774	result[current++] = new Double(eval.trueNegativeRate(m_IRclass));
	775	result[current++] = new Double(eval.numTrueNegatives(m_IRclass));
	776	result[current++] = new Double(eval.falseNegativeRate(m_IRclass));
	777	result[current++] = new Double(eval.numFalseNegatives(m_IRclass));
	778	result[current++] = new Double(eval.precision(m_IRclass));
	779	result[current++] = new Double(eval.recall(m_IRclass));
	780	result[current++] = new Double(eval.fMeasure(m_IRclass));
	781	result[current++] = new Double(eval.areaUnderROC(m_IRclass));
	782
	783	// Weighted IR stats
	784	result[current++] = new Double(eval.weightedTruePositiveRate());
	785	result[current++] = new Double(eval.weightedFalsePositiveRate());
	786	result[current++] = new Double(eval.weightedTrueNegativeRate());
	787	result[current++] = new Double(eval.weightedFalseNegativeRate());
	788	result[current++] = new Double(eval.weightedPrecision());
	789	result[current++] = new Double(eval.weightedRecall());
	790	result[current++] = new Double(eval.weightedFMeasure());
	791	result[current++] = new Double(eval.weightedAreaUnderROC());
	792
	793	// Unweighted IR stats
	794	result[current++] = new Double(eval.unweightedMacroFmeasure());
	795	result[current++] = new Double(eval.unweightedMicroFmeasure());
	796
	797	// Timing stats
	798	result[current++] = new Double(trainTimeElapsed / 1000.0);
	799	result[current++] = new Double(testTimeElapsed / 1000.0);
	800	if(canMeasureCPUTime) {
	801	result[current++] = new Double((trainCPUTimeElapsed/1000000.0) / 1000.0);
	802	result[current++] = new Double((testCPUTimeElapsed /1000000.0) / 1000.0);
	803	}
	804	else {
	805	result[current++] = new Double(Utils.missingValue());
	806	result[current++] = new Double(Utils.missingValue());
	807	}
	808
	809	// sizes
	810	ByteArrayOutputStream bastream = new ByteArrayOutputStream();
	811	ObjectOutputStream oostream = new ObjectOutputStream(bastream);
	812	oostream.writeObject(m_Classifier);
	813	result[current++] = new Double(bastream.size());
	814	bastream = new ByteArrayOutputStream();
	815	oostream = new ObjectOutputStream(bastream);
	816	oostream.writeObject(train);
	817	result[current++] = new Double(bastream.size());
	818	bastream = new ByteArrayOutputStream();
	819	oostream = new ObjectOutputStream(bastream);
	820	oostream.writeObject(test);
	821	result[current++] = new Double(bastream.size());
	822
	823	// Prediction interval statistics
	824	result[current++] = new Double(eval.coverageOfTestCasesByPredictedRegions());
	825	result[current++] = new Double(eval.sizeOfPredictedRegions());
	826
	827	// IDs
	828	if (getAttributeID() >= 0){
	829	String idsString = "";
	830	if (test.attribute(m_attID).isNumeric()){
	831	if (test.numInstances() > 0)
	832	idsString += test.instance(0).value(m_attID);
	833	for(int i=1;i<test.numInstances();i++){
	834	idsString += "\|" + test.instance(i).value(m_attID);
	835	}
	836	} else {
	837	if (test.numInstances() > 0)
	838	idsString += test.instance(0).stringValue(m_attID);
	839	for(int i=1;i<test.numInstances();i++){
	840	idsString += "\|" + test.instance(i).stringValue(m_attID);
	841	}
	842	}
	843	result[current++] = idsString;
	844	}
	845
	846	if (getPredTargetColumn()){
	847	if (test.classAttribute().isNumeric()){
	848	// Targets
	849	if (test.numInstances() > 0){
	850	String targetsString = "";
	851	targetsString += test.instance(0).value(test.classIndex());
	852	for(int i=1;i<test.numInstances();i++){
	853	targetsString += "\|" + test.instance(i).value(test.classIndex());
	854	}
	855	result[current++] = targetsString;
	856	}
	857
	858	// Predictions
	859	if (predictions.length > 0){
	860	String predictionsString = "";
	861	predictionsString += predictions[0];
	862	for(int i=1;i<predictions.length;i++){
	863	predictionsString += "\|" + predictions[i];
	864	}
	865	result[current++] = predictionsString;
	866	}
	867	} else {
	868	// Targets
	869	if (test.numInstances() > 0){
	870	String targetsString = "";
	871	targetsString += test.instance(0).stringValue(test.classIndex());
	872	for(int i=1;i<test.numInstances();i++){
	873	targetsString += "\|" + test.instance(i).stringValue(test.classIndex());
	874	}
	875	result[current++] = targetsString;
	876	}
	877
	878	// Predictions
	879	if (predictions.length > 0){
	880	String predictionsString = "";
	881	predictionsString += test.classAttribute().value((int) predictions[0]);
	882	for(int i=1;i<predictions.length;i++){
	883	predictionsString += "\|" + test.classAttribute().value((int) predictions[i]);
	884	}
	885	result[current++] = predictionsString;
	886	}
	887	}
	888	}
	889
	890	if (m_Classifier instanceof Summarizable) {
	891	result[current++] = ((Summarizable)m_Classifier).toSummaryString();
	892	} else {
	893	result[current++] = null;
	894	}
	895
	896	for (int i=0;i<addm;i++) {
	897	if (m_doesProduce[i]) {
	898	try {
	899	double dv = ((AdditionalMeasureProducer)m_Classifier).
	900	getMeasure(m_AdditionalMeasures[i]);
	901	if (!Utils.isMissingValue(dv)) {
	902	Double value = new Double(dv);
	903	result[current++] = value;
	904	} else {
	905	result[current++] = null;
	906	}
	907	} catch (Exception ex) {
	908	System.err.println(ex);
	909	}
	910	} else {
	911	result[current++] = null;
	912	}
	913	}
	914
	915	if (current != overall_length) {
	916	throw new Error("Results didn't fit RESULT_SIZE");
	917	}
	918	return result;
	919	}
	920
	921	/**
	922	* Returns the tip text for this property
	923	* @return tip text for this property suitable for
	924	* displaying in the explorer/experimenter gui
	925	*/
	926	public String classifierTipText() {
	927	return "The classifier to use.";
	928	}
	929
	930	/**
	931	* Get the value of Classifier.
	932	*
	933	* @return Value of Classifier.
	934	*/
	935	public Classifier getClassifier() {
	936
	937	return m_Template;
	938	}
	939
	940	/**
	941	* Sets the classifier.
	942	*
	943	* @param newClassifier the new classifier to use.
	944	*/
	945	public void setClassifier(Classifier newClassifier) {
	946
	947	m_Template = newClassifier;
	948	updateOptions();
	949	}
	950
	951	/**
	952	* Get the value of ClassForIRStatistics.
	953	* @return Value of ClassForIRStatistics.
	954	*/
	955	public int getClassForIRStatistics() {
	956	return m_IRclass;
	957	}
	958
	959	/**
	960	* Set the value of ClassForIRStatistics.
	961	* @param v Value to assign to ClassForIRStatistics.
	962	*/
	963	public void setClassForIRStatistics(int v) {
	964	m_IRclass = v;
	965	}
	966
	967	/**
	968	* Get the index of Attibute Identifying the instances
	969	* @return index of outputed Attribute.
	970	*/
	971	public int getAttributeID() {
	972	return m_attID;
	973	}
	974
	975	/**
	976	* Set the index of Attibute Identifying the instances
	977	* @param v index the attribute to output
	978	*/
	979	public void setAttributeID(int v) {
	980	m_attID = v;
	981	}
	982
	983	/**
	984	*@return true if the prediction and target columns must be outputed.
	985	*/
	986	public boolean getPredTargetColumn(){
	987	return m_predTargetColumn;
	988	}
	989
	990	/**
	991	* Set the flag for prediction and target output.
	992	*@param v true if the 2 columns have to be outputed. false otherwise.
	993	*/
	994	public void setPredTargetColumn(boolean v){
	995	m_predTargetColumn = v;
	996	}
	997
	998	/**
	999	* Updates the options that the current classifier is using.
	1000	*/
	1001	protected void updateOptions() {
	1002
	1003	if (m_Template instanceof OptionHandler) {
	1004	m_ClassifierOptions = Utils.joinOptions(((OptionHandler)m_Template)
	1005	.getOptions());
	1006	} else {
	1007	m_ClassifierOptions = "";
	1008	}
	1009	if (m_Template instanceof Serializable) {
	1010	ObjectStreamClass obs = ObjectStreamClass.lookup(m_Template
	1011	.getClass());
	1012	m_ClassifierVersion = "" + obs.getSerialVersionUID();
	1013	} else {
	1014	m_ClassifierVersion = "";
	1015	}
	1016	}
	1017
	1018	/**
	1019	* Set the Classifier to use, given it's class name. A new classifier will be
	1020	* instantiated.
	1021	*
	1022	* @param newClassifierName the Classifier class name.
	1023	* @throws Exception if the class name is invalid.
	1024	*/
	1025	public void setClassifierName(String newClassifierName) throws Exception {
	1026
	1027	try {
	1028	setClassifier((Classifier)Class.forName(newClassifierName)
	1029	.newInstance());
	1030	} catch (Exception ex) {
	1031	throw new Exception("Can't find Classifier with class name: "
	1032	+ newClassifierName);
	1033	}
	1034	}
	1035
	1036	/**
	1037	* Gets the raw output from the classifier
	1038	* @return the raw output from th,0e classifier
	1039	*/
	1040	public String getRawResultOutput() {
	1041	StringBuffer result = new StringBuffer();
	1042
	1043	if (m_Classifier == null) {
	1044	return "<null> classifier";
	1045	}
	1046	result.append(toString());
	1047	result.append("Classifier model: \n"+m_Classifier.toString()+'\n');
	1048
	1049	// append the performance statistics
	1050	if (m_result != null) {
	1051	result.append(m_result);
	1052
	1053	if (m_doesProduce != null) {
	1054	for (int i=0;i<m_doesProduce.length;i++) {
	1055	if (m_doesProduce[i]) {
	1056	try {
	1057	double dv = ((AdditionalMeasureProducer)m_Classifier).
	1058	getMeasure(m_AdditionalMeasures[i]);
	1059	if (!Utils.isMissingValue(dv)) {
	1060	Double value = new Double(dv);
	1061	result.append(m_AdditionalMeasures[i]+" : "+value+'\n');
	1062	} else {
	1063	result.append(m_AdditionalMeasures[i]+" : "+'?'+'\n');
	1064	}
	1065	} catch (Exception ex) {
	1066	System.err.println(ex);
	1067	}
	1068	}
	1069	}
	1070	}
	1071	}
	1072	return result.toString();
	1073	}
	1074
	1075	/**
	1076	* Returns a text description of the split evaluator.
	1077	*
	1078	* @return a text description of the split evaluator.
	1079	*/
	1080	public String toString() {
	1081
	1082	String result = "ClassifierSplitEvaluator: ";
	1083	if (m_Template == null) {
	1084	return result + "<null> classifier";
	1085	}
	1086	return result + m_Template.getClass().getName() + " "
	1087	+ m_ClassifierOptions + "(version " + m_ClassifierVersion + ")";
	1088	}
	1089
	1090	/**
	1091	* Returns the revision string.
	1092	*
	1093	* @return the revision
	1094	*/
	1095	public String getRevision() {
	1096	return RevisionUtils.extract("$Revision: 5987 $");
	1097	}
	1098	} // ClassifierSplitEvaluator

Note: See TracBrowser for help on using the repository browser.

Download in other formats: