Context Navigation

source: src/main/java/weka/experiment/ClassifierSplitEvaluator.java @ 17

Last change on this file since 17 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 35.7 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* ClassifierSplitEvaluator.java
19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23
24	package weka.experiment;
25
26	import weka.classifiers.Classifier;
27	import weka.classifiers.AbstractClassifier;
28	import weka.classifiers.Evaluation;
29	import weka.classifiers.rules.ZeroR;
30	import weka.core.AdditionalMeasureProducer;
31	import weka.core.Attribute;
32	import weka.core.Instance;
33	import weka.core.Instances;
34	import weka.core.Option;
35	import weka.core.OptionHandler;
36	import weka.core.RevisionHandler;
37	import weka.core.RevisionUtils;
38	import weka.core.Summarizable;
39	import weka.core.Utils;
40
41	import java.io.ByteArrayOutputStream;
42	import java.io.ObjectOutputStream;
43	import java.io.ObjectStreamClass;
44	import java.io.Serializable;
45	import java.lang.management.ManagementFactory;
46	import java.lang.management.ThreadMXBean;
47	import java.util.Enumeration;
48	import java.util.Vector;
49
50
51	/**
52	<!-- globalinfo-start -->
53	* A SplitEvaluator that produces results for a classification scheme on a nominal class attribute.
54	* <p/>
55	<!-- globalinfo-end -->
56	*
57	<!-- options-start -->
58	* Valid options are: <p/>
59	*
60	* <pre> -W <class name>
61	* The full class name of the classifier.
62	* eg: weka.classifiers.bayes.NaiveBayes</pre>
63	*
64	* <pre> -C <index>
65	* The index of the class for which IR statistics
66	* are to be output. (default 1)</pre>
67	*
68	* <pre> -I <index>
69	* The index of an attribute to output in the
70	* results. This attribute should identify an
71	* instance in order to know which instances are
72	* in the test set of a cross validation. if 0
73	* no output (default 0).</pre>
74	*
75	* <pre> -P
76	* Add target and prediction columns to the result
77	* for each fold.</pre>
78	*
79	* <pre>
80	* Options specific to classifier weka.classifiers.rules.ZeroR:
81	* </pre>
82	*
83	* <pre> -D
84	* If set, classifier is run in debug mode and
85	* may output additional info to the console</pre>
86	*
87	<!-- options-end -->
88	*
89	* All options after -- will be passed to the classifier.
90	*
91	* @author Len Trigg (trigg@cs.waikato.ac.nz)
92	* @version $Revision: 5987 $
93	*/
94	public class ClassifierSplitEvaluator
95	implements SplitEvaluator, OptionHandler, AdditionalMeasureProducer,
96	RevisionHandler {
97
98	/** for serialization */
99	static final long serialVersionUID = -8511241602760467265L;
100
101	/** The template classifier */
102	protected Classifier m_Template = new ZeroR();
103
104	/** The classifier used for evaluation */
105	protected Classifier m_Classifier;
106
107	/** The names of any additional measures to look for in SplitEvaluators */
108	protected String [] m_AdditionalMeasures = null;
109
110	/** Array of booleans corresponding to the measures in m_AdditionalMeasures
111	indicating which of the AdditionalMeasures the current classifier
112	can produce */
113	protected boolean [] m_doesProduce = null;
114
115	/** The number of additional measures that need to be filled in
116	after taking into account column constraints imposed by the final
117	destination for results */
118	protected int m_numberAdditionalMeasures = 0;
119
120	/** Holds the statistics for the most recent application of the classifier */
121	protected String m_result = null;
122
123	/** The classifier options (if any) */
124	protected String m_ClassifierOptions = "";
125
126	/** The classifier version */
127	protected String m_ClassifierVersion = "";
128
129	/** The length of a key */
130	private static final int KEY_SIZE = 3;
131
132	/** The length of a result */
133	private static final int RESULT_SIZE = 30;
134
135	/** The number of IR statistics */
136	private static final int NUM_IR_STATISTICS = 14;
137
138	/** The number of averaged IR statistics */
139	private static final int NUM_WEIGHTED_IR_STATISTICS = 8;
140
141	/** The number of unweighted averaged IR statistics */
142	private static final int NUM_UNWEIGHTED_IR_STATISTICS = 2;
143
144	/** Class index for information retrieval statistics (default 0) */
145	private int m_IRclass = 0;
146
147	/** Flag for prediction and target columns output.*/
148	private boolean m_predTargetColumn = false;
149
150	/** Attribute index of instance identifier (default -1) */
151	private int m_attID = -1;
152
153	/**
154	* No args constructor.
155	*/
156	public ClassifierSplitEvaluator() {
157
158	updateOptions();
159	}
160
161	/**
162	* Returns a string describing this split evaluator
163	* @return a description of the split evaluator suitable for
164	* displaying in the explorer/experimenter gui
165	*/
166	public String globalInfo() {
167	return " A SplitEvaluator that produces results for a classification "
168	+"scheme on a nominal class attribute.";
169	}
170
171	/**
172	* Returns an enumeration describing the available options..
173	*
174	* @return an enumeration of all the available options.
175	*/
176	public Enumeration listOptions() {
177
178	Vector newVector = new Vector(4);
179
180	newVector.addElement(new Option(
181	"\tThe full class name of the classifier.\n"
182	+"\teg: weka.classifiers.bayes.NaiveBayes",
183	"W", 1,
184	"-W <class name>"));
185	newVector.addElement(new Option(
186	"\tThe index of the class for which IR statistics\n" +
187	"\tare to be output. (default 1)",
188	"C", 1,
189	"-C <index>"));
190	newVector.addElement(new Option(
191	"\tThe index of an attribute to output in the\n" +
192	"\tresults. This attribute should identify an\n" +
193	"\tinstance in order to know which instances are\n" +
194	"\tin the test set of a cross validation. if 0\n" +
195	"\tno output (default 0).",
196	"I", 1,
197	"-I <index>"));
198	newVector.addElement(new Option(
199	"\tAdd target and prediction columns to the result\n" +
200	"\tfor each fold.",
201	"P", 0,
202	"-P"));
203
204	if ((m_Template != null) &&
205	(m_Template instanceof OptionHandler)) {
206	newVector.addElement(new Option(
207	"",
208	"", 0, "\nOptions specific to classifier "
209	+ m_Template.getClass().getName() + ":"));
210	Enumeration enu = ((OptionHandler)m_Template).listOptions();
211	while (enu.hasMoreElements()) {
212	newVector.addElement(enu.nextElement());
213	}
214	}
215	return newVector.elements();
216	}
217
218	/**
219	* Parses a given list of options. <p/>
220	*
221	<!-- options-start -->
222	* Valid options are: <p/>
223	*
224	* <pre> -W <class name>
225	* The full class name of the classifier.
226	* eg: weka.classifiers.bayes.NaiveBayes</pre>
227	*
228	* <pre> -C <index>
229	* The index of the class for which IR statistics
230	* are to be output. (default 1)</pre>
231	*
232	* <pre> -I <index>
233	* The index of an attribute to output in the
234	* results. This attribute should identify an
235	* instance in order to know which instances are
236	* in the test set of a cross validation. if 0
237	* no output (default 0).</pre>
238	*
239	* <pre> -P
240	* Add target and prediction columns to the result
241	* for each fold.</pre>
242	*
243	* <pre>
244	* Options specific to classifier weka.classifiers.rules.ZeroR:
245	* </pre>
246	*
247	* <pre> -D
248	* If set, classifier is run in debug mode and
249	* may output additional info to the console</pre>
250	*
251	<!-- options-end -->
252	*
253	* All options after -- will be passed to the classifier.
254	*
255	* @param options the list of options as an array of strings
256	* @throws Exception if an option is not supported
257	*/
258	public void setOptions(String[] options) throws Exception {
259
260	String cName = Utils.getOption('W', options);
261	if (cName.length() == 0) {
262	throw new Exception("A classifier must be specified with"
263	+ " the -W option.");
264	}
265	// Do it first without options, so if an exception is thrown during
266	// the option setting, listOptions will contain options for the actual
267	// Classifier.
268	setClassifier(AbstractClassifier.forName(cName, null));
269	if (getClassifier() instanceof OptionHandler) {
270	((OptionHandler) getClassifier())
271	.setOptions(Utils.partitionOptions(options));
272	updateOptions();
273	}
274
275	String indexName = Utils.getOption('C', options);
276	if (indexName.length() != 0) {
277	m_IRclass = (new Integer(indexName)).intValue() - 1;
278	} else {
279	m_IRclass = 0;
280	}
281
282	String attID = Utils.getOption('I', options);
283	if (attID.length() != 0) {
284	m_attID = (new Integer(attID)).intValue() - 1;
285	} else {
286	m_attID = -1;
287	}
288
289	m_predTargetColumn = Utils.getFlag('P', options);
290	}
291
292	/**
293	* Gets the current settings of the Classifier.
294	*
295	* @return an array of strings suitable for passing to setOptions
296	*/
297	public String [] getOptions() {
298
299	String [] classifierOptions = new String [0];
300	if ((m_Template != null) &&
301	(m_Template instanceof OptionHandler)) {
302	classifierOptions = ((OptionHandler)m_Template).getOptions();
303	}
304
305	String [] options = new String [classifierOptions.length + 8];
306	int current = 0;
307
308	if (getClassifier() != null) {
309	options[current++] = "-W";
310	options[current++] = getClassifier().getClass().getName();
311	}
312	options[current++] = "-I";
313	options[current++] = "" + (m_attID + 1);
314
315	if (getPredTargetColumn()) options[current++] = "-P";
316
317	options[current++] = "-C";
318	options[current++] = "" + (m_IRclass + 1);
319	options[current++] = "--";
320
321	System.arraycopy(classifierOptions, 0, options, current,
322	classifierOptions.length);
323	current += classifierOptions.length;
324	while (current < options.length) {
325	options[current++] = "";
326	}
327	return options;
328	}
329
330	/**
331	* Set a list of method names for additional measures to look for
332	* in Classifiers. This could contain many measures (of which only a
333	* subset may be produceable by the current Classifier) if an experiment
334	* is the type that iterates over a set of properties.
335	* @param additionalMeasures a list of method names
336	*/
337	public void setAdditionalMeasures(String [] additionalMeasures) {
338	// System.err.println("ClassifierSplitEvaluator: setting additional measures");
339	m_AdditionalMeasures = additionalMeasures;
340
341	// determine which (if any) of the additional measures this classifier
342	// can produce
343	if (m_AdditionalMeasures != null && m_AdditionalMeasures.length > 0) {
344	m_doesProduce = new boolean [m_AdditionalMeasures.length];
345
346	if (m_Template instanceof AdditionalMeasureProducer) {
347	Enumeration en = ((AdditionalMeasureProducer)m_Template).
348	enumerateMeasures();
349	while (en.hasMoreElements()) {
350	String mname = (String)en.nextElement();
351	for (int j=0;j<m_AdditionalMeasures.length;j++) {
352	if (mname.compareToIgnoreCase(m_AdditionalMeasures[j]) == 0) {
353	m_doesProduce[j] = true;
354	}
355	}
356	}
357	}
358	} else {
359	m_doesProduce = null;
360	}
361	}
362
363	/**
364	* Returns an enumeration of any additional measure names that might be
365	* in the classifier
366	* @return an enumeration of the measure names
367	*/
368	public Enumeration enumerateMeasures() {
369	Vector newVector = new Vector();
370	if (m_Template instanceof AdditionalMeasureProducer) {
371	Enumeration en = ((AdditionalMeasureProducer)m_Template).
372	enumerateMeasures();
373	while (en.hasMoreElements()) {
374	String mname = (String)en.nextElement();
375	newVector.addElement(mname);
376	}
377	}
378	return newVector.elements();
379	}
380
381	/**
382	* Returns the value of the named measure
383	* @param additionalMeasureName the name of the measure to query for its value
384	* @return the value of the named measure
385	* @throws IllegalArgumentException if the named measure is not supported
386	*/
387	public double getMeasure(String additionalMeasureName) {
388	if (m_Template instanceof AdditionalMeasureProducer) {
389	if (m_Classifier == null) {
390	throw new IllegalArgumentException("ClassifierSplitEvaluator: " +
391	"Can't return result for measure, " +
392	"classifier has not been built yet.");
393	}
394	return ((AdditionalMeasureProducer)m_Classifier).
395	getMeasure(additionalMeasureName);
396	} else {
397	throw new IllegalArgumentException("ClassifierSplitEvaluator: "
398	+"Can't return value for : "+additionalMeasureName
399	+". "+m_Template.getClass().getName()+" "
400	+"is not an AdditionalMeasureProducer");
401	}
402	}
403
404	/**
405	* Gets the data types of each of the key columns produced for a single run.
406	* The number of key fields must be constant
407	* for a given SplitEvaluator.
408	*
409	* @return an array containing objects of the type of each key column. The
410	* objects should be Strings, or Doubles.
411	*/
412	public Object [] getKeyTypes() {
413
414	Object [] keyTypes = new Object[KEY_SIZE];
415	keyTypes[0] = "";
416	keyTypes[1] = "";
417	keyTypes[2] = "";
418	return keyTypes;
419	}
420
421	/**
422	* Gets the names of each of the key columns produced for a single run.
423	* The number of key fields must be constant
424	* for a given SplitEvaluator.
425	*
426	* @return an array containing the name of each key column
427	*/
428	public String [] getKeyNames() {
429
430	String [] keyNames = new String[KEY_SIZE];
431	keyNames[0] = "Scheme";
432	keyNames[1] = "Scheme_options";
433	keyNames[2] = "Scheme_version_ID";
434	return keyNames;
435	}
436
437	/**
438	* Gets the key describing the current SplitEvaluator. For example
439	* This may contain the name of the classifier used for classifier
440	* predictive evaluation. The number of key fields must be constant
441	* for a given SplitEvaluator.
442	*
443	* @return an array of objects containing the key.
444	*/
445	public Object [] getKey(){
446
447	Object [] key = new Object[KEY_SIZE];
448	key[0] = m_Template.getClass().getName();
449	key[1] = m_ClassifierOptions;
450	key[2] = m_ClassifierVersion;
451	return key;
452	}
453
454	/**
455	* Gets the data types of each of the result columns produced for a
456	* single run. The number of result fields must be constant
457	* for a given SplitEvaluator.
458	*
459	* @return an array containing objects of the type of each result column.
460	* The objects should be Strings, or Doubles.
461	*/
462	public Object [] getResultTypes() {
463	int addm = (m_AdditionalMeasures != null)
464	? m_AdditionalMeasures.length
465	: 0;
466	int overall_length = RESULT_SIZE+addm;
467	overall_length += NUM_IR_STATISTICS;
468	overall_length += NUM_WEIGHTED_IR_STATISTICS;
469	overall_length += NUM_UNWEIGHTED_IR_STATISTICS;
470	if (getAttributeID() >= 0) overall_length += 1;
471	if (getPredTargetColumn()) overall_length += 2;
472	Object [] resultTypes = new Object[overall_length];
473	Double doub = new Double(0);
474	int current = 0;
475	resultTypes[current++] = doub;
476	resultTypes[current++] = doub;
477
478	resultTypes[current++] = doub;
479	resultTypes[current++] = doub;
480	resultTypes[current++] = doub;
481	resultTypes[current++] = doub;
482	resultTypes[current++] = doub;
483	resultTypes[current++] = doub;
484
485	resultTypes[current++] = doub;
486	resultTypes[current++] = doub;
487	resultTypes[current++] = doub;
488	resultTypes[current++] = doub;
489
490	resultTypes[current++] = doub;
491	resultTypes[current++] = doub;
492	resultTypes[current++] = doub;
493	resultTypes[current++] = doub;
494	resultTypes[current++] = doub;
495	resultTypes[current++] = doub;
496
497	resultTypes[current++] = doub;
498	resultTypes[current++] = doub;
499	resultTypes[current++] = doub;
500	resultTypes[current++] = doub;
501
502	// IR stats
503	resultTypes[current++] = doub;
504	resultTypes[current++] = doub;
505	resultTypes[current++] = doub;
506	resultTypes[current++] = doub;
507	resultTypes[current++] = doub;
508	resultTypes[current++] = doub;
509	resultTypes[current++] = doub;
510	resultTypes[current++] = doub;
511	resultTypes[current++] = doub;
512	resultTypes[current++] = doub;
513	resultTypes[current++] = doub;
514	resultTypes[current++] = doub;
515
516	// Unweighted IR stats
517	resultTypes[current++] = doub;
518	resultTypes[current++] = doub;
519
520	// Weighted IR stats
521	resultTypes[current++] = doub;
522	resultTypes[current++] = doub;
523	resultTypes[current++] = doub;
524	resultTypes[current++] = doub;
525	resultTypes[current++] = doub;
526	resultTypes[current++] = doub;
527	resultTypes[current++] = doub;
528	resultTypes[current++] = doub;
529
530	// Timing stats
531	resultTypes[current++] = doub;
532	resultTypes[current++] = doub;
533	resultTypes[current++] = doub;
534	resultTypes[current++] = doub;
535
536	// sizes
537	resultTypes[current++] = doub;
538	resultTypes[current++] = doub;
539	resultTypes[current++] = doub;
540
541	// Prediction interval statistics
542	resultTypes[current++] = doub;
543	resultTypes[current++] = doub;
544
545	// ID/Targets/Predictions
546	if (getAttributeID() >= 0) resultTypes[current++] = "";
547	if (getPredTargetColumn()){
548	resultTypes[current++] = "";
549	resultTypes[current++] = "";
550	}
551
552	// Classifier defined extras
553	resultTypes[current++] = "";
554
555	// add any additional measures
556	for (int i=0;i<addm;i++) {
557	resultTypes[current++] = doub;
558	}
559	if (current != overall_length) {
560	throw new Error("ResultTypes didn't fit RESULT_SIZE");
561	}
562	return resultTypes;
563	}
564
565	/**
566	* Gets the names of each of the result columns produced for a single run.
567	* The number of result fields must be constant
568	* for a given SplitEvaluator.
569	*
570	* @return an array containing the name of each result column
571	*/
572	public String [] getResultNames() {
573	int addm = (m_AdditionalMeasures != null)
574	? m_AdditionalMeasures.length
575	: 0;
576	int overall_length = RESULT_SIZE+addm;
577	overall_length += NUM_IR_STATISTICS;
578	overall_length += NUM_WEIGHTED_IR_STATISTICS;
579	overall_length += NUM_UNWEIGHTED_IR_STATISTICS;
580	if (getAttributeID() >= 0) overall_length += 1;
581	if (getPredTargetColumn()) overall_length += 2;
582
583	String [] resultNames = new String[overall_length];
584	int current = 0;
585	resultNames[current++] = "Number_of_training_instances";
586	resultNames[current++] = "Number_of_testing_instances";
587
588	// Basic performance stats - right vs wrong
589	resultNames[current++] = "Number_correct";
590	resultNames[current++] = "Number_incorrect";
591	resultNames[current++] = "Number_unclassified";
592	resultNames[current++] = "Percent_correct";
593	resultNames[current++] = "Percent_incorrect";
594	resultNames[current++] = "Percent_unclassified";
595	resultNames[current++] = "Kappa_statistic";
596
597	// Sensitive stats - certainty of predictions
598	resultNames[current++] = "Mean_absolute_error";
599	resultNames[current++] = "Root_mean_squared_error";
600	resultNames[current++] = "Relative_absolute_error";
601	resultNames[current++] = "Root_relative_squared_error";
602
603	// SF stats
604	resultNames[current++] = "SF_prior_entropy";
605	resultNames[current++] = "SF_scheme_entropy";
606	resultNames[current++] = "SF_entropy_gain";
607	resultNames[current++] = "SF_mean_prior_entropy";
608	resultNames[current++] = "SF_mean_scheme_entropy";
609	resultNames[current++] = "SF_mean_entropy_gain";
610
611	// K&B stats
612	resultNames[current++] = "KB_information";
613	resultNames[current++] = "KB_mean_information";
614	resultNames[current++] = "KB_relative_information";
615
616	// IR stats
617	resultNames[current++] = "True_positive_rate";
618	resultNames[current++] = "Num_true_positives";
619	resultNames[current++] = "False_positive_rate";
620	resultNames[current++] = "Num_false_positives";
621	resultNames[current++] = "True_negative_rate";
622	resultNames[current++] = "Num_true_negatives";
623	resultNames[current++] = "False_negative_rate";
624	resultNames[current++] = "Num_false_negatives";
625	resultNames[current++] = "IR_precision";
626	resultNames[current++] = "IR_recall";
627	resultNames[current++] = "F_measure";
628	resultNames[current++] = "Area_under_ROC";
629
630	// Weighted IR stats
631	resultNames[current++] = "Weighted_avg_true_positive_rate";
632	resultNames[current++] = "Weighted_avg_false_positive_rate";
633	resultNames[current++] = "Weighted_avg_true_negative_rate";
634	resultNames[current++] = "Weighted_avg_false_negative_rate";
635	resultNames[current++] = "Weighted_avg_IR_precision";
636	resultNames[current++] = "Weighted_avg_IR_recall";
637	resultNames[current++] = "Weighted_avg_F_measure";
638	resultNames[current++] = "Weighted_avg_area_under_ROC";
639
640	// Unweighted IR stats
641	resultNames[current++] = "Unweighted_macro_avg_F_measure";
642	resultNames[current++] = "Unweighted_micro_avg_F_measure";
643
644	// Timing stats
645	resultNames[current++] = "Elapsed_Time_training";
646	resultNames[current++] = "Elapsed_Time_testing";
647	resultNames[current++] = "UserCPU_Time_training";
648	resultNames[current++] = "UserCPU_Time_testing";
649
650	// sizes
651	resultNames[current++] = "Serialized_Model_Size";
652	resultNames[current++] = "Serialized_Train_Set_Size";
653	resultNames[current++] = "Serialized_Test_Set_Size";
654
655	// Prediction interval statistics
656	resultNames[current++] = "Coverage_of_Test_Cases_By_Regions";
657	resultNames[current++] = "Size_of_Predicted_Regions";
658
659	// ID/Targets/Predictions
660	if (getAttributeID() >= 0) resultNames[current++] = "Instance_ID";
661	if (getPredTargetColumn()){
662	resultNames[current++] = "Targets";
663	resultNames[current++] = "Predictions";
664	}
665
666	// Classifier defined extras
667	resultNames[current++] = "Summary";
668	// add any additional measures
669	for (int i=0;i<addm;i++) {
670	resultNames[current++] = m_AdditionalMeasures[i];
671	}
672	if (current != overall_length) {
673	throw new Error("ResultNames didn't fit RESULT_SIZE");
674	}
675	return resultNames;
676	}
677
678	/**
679	* Gets the results for the supplied train and test datasets. Now performs
680	* a deep copy of the classifier before it is built and evaluated (just in case
681	* the classifier is not initialized properly in buildClassifier()).
682	*
683	* @param train the training Instances.
684	* @param test the testing Instances.
685	* @return the results stored in an array. The objects stored in
686	* the array may be Strings, Doubles, or null (for the missing value).
687	* @throws Exception if a problem occurs while getting the results
688	*/
689	public Object [] getResult(Instances train, Instances test)
690	throws Exception {
691
692	if (train.classAttribute().type() != Attribute.NOMINAL) {
693	throw new Exception("Class attribute is not nominal!");
694	}
695	if (m_Template == null) {
696	throw new Exception("No classifier has been specified");
697	}
698	int addm = (m_AdditionalMeasures != null) ? m_AdditionalMeasures.length : 0;
699	int overall_length = RESULT_SIZE+addm;
700	overall_length += NUM_IR_STATISTICS;
701	overall_length += NUM_WEIGHTED_IR_STATISTICS;
702	overall_length += NUM_UNWEIGHTED_IR_STATISTICS;
703	if (getAttributeID() >= 0) overall_length += 1;
704	if (getPredTargetColumn()) overall_length += 2;
705
706	ThreadMXBean thMonitor = ManagementFactory.getThreadMXBean();
707	boolean canMeasureCPUTime = thMonitor.isThreadCpuTimeSupported();
708	if(!thMonitor.isThreadCpuTimeEnabled())
709	thMonitor.setThreadCpuTimeEnabled(true);
710
711	Object [] result = new Object[overall_length];
712	Evaluation eval = new Evaluation(train);
713	m_Classifier = AbstractClassifier.makeCopy(m_Template);
714	double [] predictions;
715	long thID = Thread.currentThread().getId();
716	long CPUStartTime=-1, trainCPUTimeElapsed=-1, testCPUTimeElapsed=-1,
717	trainTimeStart, trainTimeElapsed, testTimeStart, testTimeElapsed;
718
719	//training classifier
720	trainTimeStart = System.currentTimeMillis();
721	if(canMeasureCPUTime)
722	CPUStartTime = thMonitor.getThreadUserTime(thID);
723	m_Classifier.buildClassifier(train);
724	if(canMeasureCPUTime)
725	trainCPUTimeElapsed = thMonitor.getThreadUserTime(thID) - CPUStartTime;
726	trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
727
728	//testing classifier
729	testTimeStart = System.currentTimeMillis();
730	if(canMeasureCPUTime)
731	CPUStartTime = thMonitor.getThreadUserTime(thID);
732	predictions = eval.evaluateModel(m_Classifier, test);
733	if(canMeasureCPUTime)
734	testCPUTimeElapsed = thMonitor.getThreadUserTime(thID) - CPUStartTime;
735	testTimeElapsed = System.currentTimeMillis() - testTimeStart;
736	thMonitor = null;
737
738	m_result = eval.toSummaryString();
739	// The results stored are all per instance -- can be multiplied by the
740	// number of instances to get absolute numbers
741	int current = 0;
742	result[current++] = new Double(train.numInstances());
743	result[current++] = new Double(eval.numInstances());
744	result[current++] = new Double(eval.correct());
745	result[current++] = new Double(eval.incorrect());
746	result[current++] = new Double(eval.unclassified());
747	result[current++] = new Double(eval.pctCorrect());
748	result[current++] = new Double(eval.pctIncorrect());
749	result[current++] = new Double(eval.pctUnclassified());
750	result[current++] = new Double(eval.kappa());
751
752	result[current++] = new Double(eval.meanAbsoluteError());
753	result[current++] = new Double(eval.rootMeanSquaredError());
754	result[current++] = new Double(eval.relativeAbsoluteError());
755	result[current++] = new Double(eval.rootRelativeSquaredError());
756
757	result[current++] = new Double(eval.SFPriorEntropy());
758	result[current++] = new Double(eval.SFSchemeEntropy());
759	result[current++] = new Double(eval.SFEntropyGain());
760	result[current++] = new Double(eval.SFMeanPriorEntropy());
761	result[current++] = new Double(eval.SFMeanSchemeEntropy());
762	result[current++] = new Double(eval.SFMeanEntropyGain());
763
764	// K&B stats
765	result[current++] = new Double(eval.KBInformation());
766	result[current++] = new Double(eval.KBMeanInformation());
767	result[current++] = new Double(eval.KBRelativeInformation());
768
769	// IR stats
770	result[current++] = new Double(eval.truePositiveRate(m_IRclass));
771	result[current++] = new Double(eval.numTruePositives(m_IRclass));
772	result[current++] = new Double(eval.falsePositiveRate(m_IRclass));
773	result[current++] = new Double(eval.numFalsePositives(m_IRclass));
774	result[current++] = new Double(eval.trueNegativeRate(m_IRclass));
775	result[current++] = new Double(eval.numTrueNegatives(m_IRclass));
776	result[current++] = new Double(eval.falseNegativeRate(m_IRclass));
777	result[current++] = new Double(eval.numFalseNegatives(m_IRclass));
778	result[current++] = new Double(eval.precision(m_IRclass));
779	result[current++] = new Double(eval.recall(m_IRclass));
780	result[current++] = new Double(eval.fMeasure(m_IRclass));
781	result[current++] = new Double(eval.areaUnderROC(m_IRclass));
782
783	// Weighted IR stats
784	result[current++] = new Double(eval.weightedTruePositiveRate());
785	result[current++] = new Double(eval.weightedFalsePositiveRate());
786	result[current++] = new Double(eval.weightedTrueNegativeRate());
787	result[current++] = new Double(eval.weightedFalseNegativeRate());
788	result[current++] = new Double(eval.weightedPrecision());
789	result[current++] = new Double(eval.weightedRecall());
790	result[current++] = new Double(eval.weightedFMeasure());
791	result[current++] = new Double(eval.weightedAreaUnderROC());
792
793	// Unweighted IR stats
794	result[current++] = new Double(eval.unweightedMacroFmeasure());
795	result[current++] = new Double(eval.unweightedMicroFmeasure());
796
797	// Timing stats
798	result[current++] = new Double(trainTimeElapsed / 1000.0);
799	result[current++] = new Double(testTimeElapsed / 1000.0);
800	if(canMeasureCPUTime) {
801	result[current++] = new Double((trainCPUTimeElapsed/1000000.0) / 1000.0);
802	result[current++] = new Double((testCPUTimeElapsed /1000000.0) / 1000.0);
803	}
804	else {
805	result[current++] = new Double(Utils.missingValue());
806	result[current++] = new Double(Utils.missingValue());
807	}
808
809	// sizes
810	ByteArrayOutputStream bastream = new ByteArrayOutputStream();
811	ObjectOutputStream oostream = new ObjectOutputStream(bastream);
812	oostream.writeObject(m_Classifier);
813	result[current++] = new Double(bastream.size());
814	bastream = new ByteArrayOutputStream();
815	oostream = new ObjectOutputStream(bastream);
816	oostream.writeObject(train);
817	result[current++] = new Double(bastream.size());
818	bastream = new ByteArrayOutputStream();
819	oostream = new ObjectOutputStream(bastream);
820	oostream.writeObject(test);
821	result[current++] = new Double(bastream.size());
822
823	// Prediction interval statistics
824	result[current++] = new Double(eval.coverageOfTestCasesByPredictedRegions());
825	result[current++] = new Double(eval.sizeOfPredictedRegions());
826
827	// IDs
828	if (getAttributeID() >= 0){
829	String idsString = "";
830	if (test.attribute(m_attID).isNumeric()){
831	if (test.numInstances() > 0)
832	idsString += test.instance(0).value(m_attID);
833	for(int i=1;i<test.numInstances();i++){
834	idsString += "\|" + test.instance(i).value(m_attID);
835	}
836	} else {
837	if (test.numInstances() > 0)
838	idsString += test.instance(0).stringValue(m_attID);
839	for(int i=1;i<test.numInstances();i++){
840	idsString += "\|" + test.instance(i).stringValue(m_attID);
841	}
842	}
843	result[current++] = idsString;
844	}
845
846	if (getPredTargetColumn()){
847	if (test.classAttribute().isNumeric()){
848	// Targets
849	if (test.numInstances() > 0){
850	String targetsString = "";
851	targetsString += test.instance(0).value(test.classIndex());
852	for(int i=1;i<test.numInstances();i++){
853	targetsString += "\|" + test.instance(i).value(test.classIndex());
854	}
855	result[current++] = targetsString;
856	}
857
858	// Predictions
859	if (predictions.length > 0){
860	String predictionsString = "";
861	predictionsString += predictions[0];
862	for(int i=1;i<predictions.length;i++){
863	predictionsString += "\|" + predictions[i];
864	}
865	result[current++] = predictionsString;
866	}
867	} else {
868	// Targets
869	if (test.numInstances() > 0){
870	String targetsString = "";
871	targetsString += test.instance(0).stringValue(test.classIndex());
872	for(int i=1;i<test.numInstances();i++){
873	targetsString += "\|" + test.instance(i).stringValue(test.classIndex());
874	}
875	result[current++] = targetsString;
876	}
877
878	// Predictions
879	if (predictions.length > 0){
880	String predictionsString = "";
881	predictionsString += test.classAttribute().value((int) predictions[0]);
882	for(int i=1;i<predictions.length;i++){
883	predictionsString += "\|" + test.classAttribute().value((int) predictions[i]);
884	}
885	result[current++] = predictionsString;
886	}
887	}
888	}
889
890	if (m_Classifier instanceof Summarizable) {
891	result[current++] = ((Summarizable)m_Classifier).toSummaryString();
892	} else {
893	result[current++] = null;
894	}
895
896	for (int i=0;i<addm;i++) {
897	if (m_doesProduce[i]) {
898	try {
899	double dv = ((AdditionalMeasureProducer)m_Classifier).
900	getMeasure(m_AdditionalMeasures[i]);
901	if (!Utils.isMissingValue(dv)) {
902	Double value = new Double(dv);
903	result[current++] = value;
904	} else {
905	result[current++] = null;
906	}
907	} catch (Exception ex) {
908	System.err.println(ex);
909	}
910	} else {
911	result[current++] = null;
912	}
913	}
914
915	if (current != overall_length) {
916	throw new Error("Results didn't fit RESULT_SIZE");
917	}
918	return result;
919	}
920
921	/**
922	* Returns the tip text for this property
923	* @return tip text for this property suitable for
924	* displaying in the explorer/experimenter gui
925	*/
926	public String classifierTipText() {
927	return "The classifier to use.";
928	}
929
930	/**
931	* Get the value of Classifier.
932	*
933	* @return Value of Classifier.
934	*/
935	public Classifier getClassifier() {
936
937	return m_Template;
938	}
939
940	/**
941	* Sets the classifier.
942	*
943	* @param newClassifier the new classifier to use.
944	*/
945	public void setClassifier(Classifier newClassifier) {
946
947	m_Template = newClassifier;
948	updateOptions();
949	}
950
951	/**
952	* Get the value of ClassForIRStatistics.
953	* @return Value of ClassForIRStatistics.
954	*/
955	public int getClassForIRStatistics() {
956	return m_IRclass;
957	}
958
959	/**
960	* Set the value of ClassForIRStatistics.
961	* @param v Value to assign to ClassForIRStatistics.
962	*/
963	public void setClassForIRStatistics(int v) {
964	m_IRclass = v;
965	}
966
967	/**
968	* Get the index of Attibute Identifying the instances
969	* @return index of outputed Attribute.
970	*/
971	public int getAttributeID() {
972	return m_attID;
973	}
974
975	/**
976	* Set the index of Attibute Identifying the instances
977	* @param v index the attribute to output
978	*/
979	public void setAttributeID(int v) {
980	m_attID = v;
981	}
982
983	/**
984	*@return true if the prediction and target columns must be outputed.
985	*/
986	public boolean getPredTargetColumn(){
987	return m_predTargetColumn;
988	}
989
990	/**
991	* Set the flag for prediction and target output.
992	*@param v true if the 2 columns have to be outputed. false otherwise.
993	*/
994	public void setPredTargetColumn(boolean v){
995	m_predTargetColumn = v;
996	}
997
998	/**
999	* Updates the options that the current classifier is using.
1000	*/
1001	protected void updateOptions() {
1002
1003	if (m_Template instanceof OptionHandler) {
1004	m_ClassifierOptions = Utils.joinOptions(((OptionHandler)m_Template)
1005	.getOptions());
1006	} else {
1007	m_ClassifierOptions = "";
1008	}
1009	if (m_Template instanceof Serializable) {
1010	ObjectStreamClass obs = ObjectStreamClass.lookup(m_Template
1011	.getClass());
1012	m_ClassifierVersion = "" + obs.getSerialVersionUID();
1013	} else {
1014	m_ClassifierVersion = "";
1015	}
1016	}
1017
1018	/**
1019	* Set the Classifier to use, given it's class name. A new classifier will be
1020	* instantiated.
1021	*
1022	* @param newClassifierName the Classifier class name.
1023	* @throws Exception if the class name is invalid.
1024	*/
1025	public void setClassifierName(String newClassifierName) throws Exception {
1026
1027	try {
1028	setClassifier((Classifier)Class.forName(newClassifierName)
1029	.newInstance());
1030	} catch (Exception ex) {
1031	throw new Exception("Can't find Classifier with class name: "
1032	+ newClassifierName);
1033	}
1034	}
1035
1036	/**
1037	* Gets the raw output from the classifier
1038	* @return the raw output from th,0e classifier
1039	*/
1040	public String getRawResultOutput() {
1041	StringBuffer result = new StringBuffer();
1042
1043	if (m_Classifier == null) {
1044	return "<null> classifier";
1045	}
1046	result.append(toString());
1047	result.append("Classifier model: \n"+m_Classifier.toString()+'\n');
1048
1049	// append the performance statistics
1050	if (m_result != null) {
1051	result.append(m_result);
1052
1053	if (m_doesProduce != null) {
1054	for (int i=0;i<m_doesProduce.length;i++) {
1055	if (m_doesProduce[i]) {
1056	try {
1057	double dv = ((AdditionalMeasureProducer)m_Classifier).
1058	getMeasure(m_AdditionalMeasures[i]);
1059	if (!Utils.isMissingValue(dv)) {
1060	Double value = new Double(dv);
1061	result.append(m_AdditionalMeasures[i]+" : "+value+'\n');
1062	} else {
1063	result.append(m_AdditionalMeasures[i]+" : "+'?'+'\n');
1064	}
1065	} catch (Exception ex) {
1066	System.err.println(ex);
1067	}
1068	}
1069	}
1070	}
1071	}
1072	return result.toString();
1073	}
1074
1075	/**
1076	* Returns a text description of the split evaluator.
1077	*
1078	* @return a text description of the split evaluator.
1079	*/
1080	public String toString() {
1081
1082	String result = "ClassifierSplitEvaluator: ";
1083	if (m_Template == null) {
1084	return result + "<null> classifier";
1085	}
1086	return result + m_Template.getClass().getName() + " "
1087	+ m_ClassifierOptions + "(version " + m_ClassifierVersion + ")";
1088	}
1089
1090	/**
1091	* Returns the revision string.
1092	*
1093	* @return the revision
1094	*/
1095	public String getRevision() {
1096	return RevisionUtils.extract("$Revision: 5987 $");
1097	}
1098	} // ClassifierSplitEvaluator

Note: See TracBrowser for help on using the repository browser.

Download in other formats: