Context Navigation

source: src/main/java/weka/attributeSelection/WrapperSubsetEval.java @ 15

Last change on this file since 15 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 23.8 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* WrapperSubsetEval.java
19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23	package weka.attributeSelection;
24
25	import weka.classifiers.Classifier;
26	import weka.classifiers.AbstractClassifier;
27	import weka.classifiers.Evaluation;
28	import weka.classifiers.rules.ZeroR;
29	import weka.core.Capabilities;
30	import weka.core.Instances;
31	import weka.core.Option;
32	import weka.core.OptionHandler;
33	import weka.core.RevisionUtils;
34	import weka.core.SelectedTag;
35	import weka.core.Tag;
36	import weka.core.TechnicalInformation;
37	import weka.core.TechnicalInformationHandler;
38	import weka.core.Utils;
39	import weka.core.Capabilities.Capability;
40	import weka.core.TechnicalInformation.Field;
41	import weka.core.TechnicalInformation.Type;
42	import weka.filters.Filter;
43	import weka.filters.unsupervised.attribute.Remove;
44
45	import java.util.BitSet;
46	import java.util.Enumeration;
47	import java.util.Random;
48	import java.util.Vector;
49
50	/**
51	<!-- globalinfo-start -->
52	* WrapperSubsetEval:<br/>
53	* <br/>
54	* Evaluates attribute sets by using a learning scheme. Cross validation is used to estimate the accuracy of the learning scheme for a set of attributes.<br/>
55	* <br/>
56	* For more information see:<br/>
57	* <br/>
58	* Ron Kohavi, George H. John (1997). Wrappers for feature subset selection. Artificial Intelligence. 97(1-2):273-324.
59	* <p/>
60	<!-- globalinfo-end -->
61	*
62	<!-- technical-bibtex-start -->
63	* BibTeX:
64	* <pre>
65	* @article{Kohavi1997,
66	* author = {Ron Kohavi and George H. John},
67	* journal = {Artificial Intelligence},
68	* note = {Special issue on relevance},
69	* number = {1-2},
70	* pages = {273-324},
71	* title = {Wrappers for feature subset selection},
72	* volume = {97},
73	* year = {1997},
74	* ISSN = {0004-3702}
75	* }
76	* </pre>
77	* <p/>
78	<!-- technical-bibtex-end -->
79	*
80	<!-- options-start -->
81	* Valid options are: <p/>
82	*
83	* <pre> -B <base learner>
84	* class name of base learner to use for accuracy estimation.
85	* Place any classifier options LAST on the command line
86	* following a "--". eg.:
87	* -B weka.classifiers.bayes.NaiveBayes ... -- -K
88	* (default: weka.classifiers.rules.ZeroR)</pre>
89	*
90	* <pre> -F <num>
91	* number of cross validation folds to use for estimating accuracy.
92	* (default=5)</pre>
93	*
94	* <pre> -R <seed>
95	* Seed for cross validation accuracy testimation.
96	* (default = 1)</pre>
97	*
98	* <pre> -T <num>
99	* threshold by which to execute another cross validation
100	* (standard deviation---expressed as a percentage of the mean).
101	* (default: 0.01 (1%))</pre>
102	*
103	* <pre> -E <acc \| rmse \| mae \| f-meas \| auc>
104	* Performance evaluation measure to use for selecting attributes.
105	* (Default = accuracy for discrete class and rmse for numeric class)</pre>
106	*
107	* <pre>
108	* Options specific to scheme weka.classifiers.rules.ZeroR:
109	* </pre>
110	*
111	* <pre> -D
112	* If set, classifier is run in debug mode and
113	* may output additional info to the console</pre>
114	*
115	<!-- options-end -->
116	*
117	* @author Mark Hall (mhall@cs.waikato.ac.nz)
118	* @version $Revision: 5928 $
119	*/
120	public class WrapperSubsetEval
121	extends ASEvaluation
122	implements SubsetEvaluator,
123	OptionHandler,
124	TechnicalInformationHandler {
125
126	/** for serialization */
127	static final long serialVersionUID = -4573057658746728675L;
128
129	/** training instances */
130	private Instances m_trainInstances;
131	/** class index */
132	private int m_classIndex;
133	/** number of attributes in the training data */
134	private int m_numAttribs;
135	/** number of instances in the training data */
136	private int m_numInstances;
137	/** holds an evaluation object */
138	private Evaluation m_Evaluation;
139	/** holds the base classifier object */
140	private Classifier m_BaseClassifier;
141	/** number of folds to use for cross validation */
142	private int m_folds;
143	/** random number seed */
144	private int m_seed;
145	/**
146	* the threshold by which to do further cross validations when
147	* estimating the accuracy of a subset
148	*/
149	private double m_threshold;
150
151	public static final int EVAL_DEFAULT = 1;
152	public static final int EVAL_ACCURACY = 2;
153	public static final int EVAL_RMSE = 3;
154	public static final int EVAL_MAE = 4;
155	public static final int EVAL_FMEASURE = 5;
156	public static final int EVAL_AUC = 6;
157
158	public static final Tag[] TAGS_EVALUATION = {
159	new Tag(EVAL_DEFAULT, "Default: accuracy (discrete class); RMSE (numeric class)"),
160	new Tag(EVAL_ACCURACY, "Accuracy (discrete class only)"),
161	new Tag(EVAL_RMSE, "RMSE (of the class probabilities for discrete class)"),
162	new Tag(EVAL_MAE, "MAE (of the class probabilities for discrete class)"),
163	new Tag(EVAL_FMEASURE, "F-measure (discrete class only)"),
164	new Tag(EVAL_AUC, "AUC (area under the ROC curve - discrete class only)")
165	};
166
167	/** The evaluation measure to use */
168	protected int m_evaluationMeasure = EVAL_DEFAULT;
169
170	/**
171	* Returns a string describing this attribute evaluator
172	* @return a description of the evaluator suitable for
173	* displaying in the explorer/experimenter gui
174	*/
175	public String globalInfo() {
176	return "WrapperSubsetEval:\n\n"
177	+"Evaluates attribute sets by using a learning scheme. Cross "
178	+"validation is used to estimate the accuracy of the learning "
179	+"scheme for a set of attributes.\n\n"
180	+ "For more information see:\n\n"
181	+ getTechnicalInformation().toString();
182	}
183
184	/**
185	* Returns an instance of a TechnicalInformation object, containing
186	* detailed information about the technical background of this class,
187	* e.g., paper reference or book this class is based on.
188	*
189	* @return the technical information about this class
190	*/
191	public TechnicalInformation getTechnicalInformation() {
192	TechnicalInformation result;
193
194	result = new TechnicalInformation(Type.ARTICLE);
195	result.setValue(Field.AUTHOR, "Ron Kohavi and George H. John");
196	result.setValue(Field.YEAR, "1997");
197	result.setValue(Field.TITLE, "Wrappers for feature subset selection");
198	result.setValue(Field.JOURNAL, "Artificial Intelligence");
199	result.setValue(Field.VOLUME, "97");
200	result.setValue(Field.NUMBER, "1-2");
201	result.setValue(Field.PAGES, "273-324");
202	result.setValue(Field.NOTE, "Special issue on relevance");
203	result.setValue(Field.ISSN, "0004-3702");
204
205	return result;
206	}
207
208	/**
209	* Constructor. Calls restOptions to set default options
210	**/
211	public WrapperSubsetEval () {
212	resetOptions();
213	}
214
215
216	/**
217	* Returns an enumeration describing the available options.
218	* @return an enumeration of all the available options.
219	**/
220	public Enumeration listOptions () {
221	Vector newVector = new Vector(4);
222	newVector.addElement(new Option(
223	"\tclass name of base learner to use for \taccuracy estimation.\n"
224	+ "\tPlace any classifier options LAST on the command line\n"
225	+ "\tfollowing a \"--\". eg.:\n"
226	+ "\t\t-B weka.classifiers.bayes.NaiveBayes ... -- -K\n"
227	+ "\t(default: weka.classifiers.rules.ZeroR)",
228	"B", 1, "-B <base learner>"));
229
230	newVector.addElement(new Option(
231	"\tnumber of cross validation folds to use for estimating accuracy.\n"
232	+ "\t(default=5)",
233	"F", 1, "-F <num>"));
234
235	newVector.addElement(new Option(
236	"\tSeed for cross validation accuracy testimation.\n"
237	+ "\t(default = 1)",
238	"R", 1,"-R <seed>"));
239
240	newVector.addElement(new Option(
241	"\tthreshold by which to execute another cross validation\n"
242	+ "\t(standard deviation---expressed as a percentage of the mean).\n"
243	+ "\t(default: 0.01 (1%))",
244	"T", 1, "-T <num>"));
245
246	newVector.addElement(new Option(
247	"\tPerformance evaluation measure to use for selecting attributes.\n" +
248	"\t(Default = accuracy for discrete class and rmse for numeric class)",
249	"E", 1, "-E <acc \| rmse \| mae \| f-meas \| auc>"));
250
251	if ((m_BaseClassifier != null) &&
252	(m_BaseClassifier instanceof OptionHandler)) {
253	newVector.addElement(new Option("", "", 0, "\nOptions specific to scheme "
254	+ m_BaseClassifier.getClass().getName()
255	+ ":"));
256	Enumeration enu = ((OptionHandler)m_BaseClassifier).listOptions();
257
258	while (enu.hasMoreElements()) {
259	newVector.addElement(enu.nextElement());
260	}
261	}
262
263	return newVector.elements();
264	}
265
266
267	/**
268	* Parses a given list of options. <p/>
269	*
270	<!-- options-start -->
271	* Valid options are: <p/>
272	*
273	* <pre> -B <base learner>
274	* class name of base learner to use for accuracy estimation.
275	* Place any classifier options LAST on the command line
276	* following a "--". eg.:
277	* -B weka.classifiers.bayes.NaiveBayes ... -- -K
278	* (default: weka.classifiers.rules.ZeroR)</pre>
279	*
280	* <pre> -F <num>
281	* number of cross validation folds to use for estimating accuracy.
282	* (default=5)</pre>
283	*
284	* <pre> -R <seed>
285	* Seed for cross validation accuracy testimation.
286	* (default = 1)</pre>
287	*
288	* <pre> -T <num>
289	* threshold by which to execute another cross validation
290	* (standard deviation---expressed as a percentage of the mean).
291	* (default: 0.01 (1%))</pre>
292	*
293	* <pre> -E <acc \| rmse \| mae \| f-meas \| auc>
294	* Performance evaluation measure to use for selecting attributes.
295	* (Default = accuracy for discrete class and rmse for numeric class)</pre>
296	*
297	* <pre>
298	* Options specific to scheme weka.classifiers.rules.ZeroR:
299	* </pre>
300	*
301	* <pre> -D
302	* If set, classifier is run in debug mode and
303	* may output additional info to the console</pre>
304	*
305	<!-- options-end -->
306	*
307	* @param options the list of options as an array of strings
308	* @throws Exception if an option is not supported
309	*/
310	public void setOptions (String[] options)
311	throws Exception {
312	String optionString;
313	resetOptions();
314	optionString = Utils.getOption('B', options);
315
316	if (optionString.length() == 0)
317	optionString = ZeroR.class.getName();
318	setClassifier(AbstractClassifier.forName(optionString,
319	Utils.partitionOptions(options)));
320	optionString = Utils.getOption('F', options);
321
322	if (optionString.length() != 0) {
323	setFolds(Integer.parseInt(optionString));
324	}
325
326	optionString = Utils.getOption('R', options);
327	if (optionString.length() != 0) {
328	setSeed(Integer.parseInt(optionString));
329	}
330
331	// optionString = Utils.getOption('S',options);
332	// if (optionString.length() != 0)
333	// {
334	// seed = Integer.parseInt(optionString);
335	// }
336	optionString = Utils.getOption('T', options);
337
338	if (optionString.length() != 0) {
339	Double temp;
340	temp = Double.valueOf(optionString);
341	setThreshold(temp.doubleValue());
342	}
343
344	optionString = Utils.getOption('E', options);
345	if (optionString.length() != 0) {
346	if (optionString.equals("acc")) {
347	setEvaluationMeasure(new SelectedTag(EVAL_ACCURACY, TAGS_EVALUATION));
348	} else if (optionString.equals("rmse")) {
349	setEvaluationMeasure(new SelectedTag(EVAL_RMSE, TAGS_EVALUATION));
350	} else if (optionString.equals("mae")) {
351	setEvaluationMeasure(new SelectedTag(EVAL_MAE, TAGS_EVALUATION));
352	} else if (optionString.equals("f-meas")) {
353	setEvaluationMeasure(new SelectedTag(EVAL_FMEASURE, TAGS_EVALUATION));
354	} else if (optionString.equals("auc")) {
355	setEvaluationMeasure(new SelectedTag(EVAL_AUC, TAGS_EVALUATION));
356	} else {
357	throw new IllegalArgumentException("Invalid evaluation measure");
358	}
359	}
360	}
361
362	/**
363	* Returns the tip text for this property
364	* @return tip text for this property suitable for
365	* displaying in the explorer/experimenter gui
366	*/
367	public String evaluationMeasureTipText() {
368	return "The measure used to evaluate the performance of attribute combinations.";
369	}
370	/**
371	* Gets the currently set performance evaluation measure used for selecting
372	* attributes for the decision table
373	*
374	* @return the performance evaluation measure
375	*/
376	public SelectedTag getEvaluationMeasure() {
377	return new SelectedTag(m_evaluationMeasure, TAGS_EVALUATION);
378	}
379
380	/**
381	* Sets the performance evaluation measure to use for selecting attributes
382	* for the decision table
383	*
384	* @param newMethod the new performance evaluation metric to use
385	*/
386	public void setEvaluationMeasure(SelectedTag newMethod) {
387	if (newMethod.getTags() == TAGS_EVALUATION) {
388	m_evaluationMeasure = newMethod.getSelectedTag().getID();
389	}
390	}
391
392	/**
393	* Returns the tip text for this property
394	* @return tip text for this property suitable for
395	* displaying in the explorer/experimenter gui
396	*/
397	public String thresholdTipText() {
398	return "Repeat xval if stdev of mean exceeds this value.";
399	}
400
401	/**
402	* Set the value of the threshold for repeating cross validation
403	*
404	* @param t the value of the threshold
405	*/
406	public void setThreshold (double t) {
407	m_threshold = t;
408	}
409
410
411	/**
412	* Get the value of the threshold
413	*
414	* @return the threshold as a double
415	*/
416	public double getThreshold () {
417	return m_threshold;
418	}
419
420	/**
421	* Returns the tip text for this property
422	* @return tip text for this property suitable for
423	* displaying in the explorer/experimenter gui
424	*/
425	public String foldsTipText() {
426	return "Number of xval folds to use when estimating subset accuracy.";
427	}
428
429	/**
430	* Set the number of folds to use for accuracy estimation
431	*
432	* @param f the number of folds
433	*/
434	public void setFolds (int f) {
435	m_folds = f;
436	}
437
438
439	/**
440	* Get the number of folds used for accuracy estimation
441	*
442	* @return the number of folds
443	*/
444	public int getFolds () {
445	return m_folds;
446	}
447
448	/**
449	* Returns the tip text for this property
450	* @return tip text for this property suitable for
451	* displaying in the explorer/experimenter gui
452	*/
453	public String seedTipText() {
454	return "Seed to use for randomly generating xval splits.";
455	}
456
457	/**
458	* Set the seed to use for cross validation
459	*
460	* @param s the seed
461	*/
462	public void setSeed (int s) {
463	m_seed = s;
464	}
465
466
467	/**
468	* Get the random number seed used for cross validation
469	*
470	* @return the seed
471	*/
472	public int getSeed () {
473	return m_seed;
474	}
475
476	/**
477	* Returns the tip text for this property
478	* @return tip text for this property suitable for
479	* displaying in the explorer/experimenter gui
480	*/
481	public String classifierTipText() {
482	return "Classifier to use for estimating the accuracy of subsets";
483	}
484
485	/**
486	* Set the classifier to use for accuracy estimation
487	*
488	* @param newClassifier the Classifier to use.
489	*/
490	public void setClassifier (Classifier newClassifier) {
491	m_BaseClassifier = newClassifier;
492	}
493
494
495	/**
496	* Get the classifier used as the base learner.
497	*
498	* @return the classifier used as the classifier
499	*/
500	public Classifier getClassifier () {
501	return m_BaseClassifier;
502	}
503
504
505	/**
506	* Gets the current settings of WrapperSubsetEval.
507	*
508	* @return an array of strings suitable for passing to setOptions()
509	*/
510	public String[] getOptions () {
511	String[] classifierOptions = new String[0];
512
513	if ((m_BaseClassifier != null) &&
514	(m_BaseClassifier instanceof OptionHandler)) {
515	classifierOptions = ((OptionHandler)m_BaseClassifier).getOptions();
516	}
517
518	String[] options = new String[9 + classifierOptions.length];
519	int current = 0;
520
521	if (getClassifier() != null) {
522	options[current++] = "-B";
523	options[current++] = getClassifier().getClass().getName();
524	}
525
526	options[current++] = "-F";
527	options[current++] = "" + getFolds();
528	options[current++] = "-T";
529	options[current++] = "" + getThreshold();
530	options[current++] = "-R";
531	options[current++] = "" + getSeed();
532	options[current++] = "--";
533	System.arraycopy(classifierOptions, 0, options, current,
534	classifierOptions.length);
535	current += classifierOptions.length;
536
537	while (current < options.length) {
538	options[current++] = "";
539	}
540
541	return options;
542	}
543
544
545	protected void resetOptions () {
546	m_trainInstances = null;
547	m_Evaluation = null;
548	m_BaseClassifier = new ZeroR();
549	m_folds = 5;
550	m_seed = 1;
551	m_threshold = 0.01;
552	}
553
554	/**
555	* Returns the capabilities of this evaluator.
556	*
557	* @return the capabilities of this evaluator
558	* @see Capabilities
559	*/
560	public Capabilities getCapabilities() {
561	Capabilities result;
562
563	if (getClassifier() == null) {
564	result = super.getCapabilities();
565	result.disableAll();
566	} else {
567	result = getClassifier().getCapabilities();
568	}
569
570	// set dependencies
571	for (Capability cap: Capability.values())
572	result.enableDependency(cap);
573
574	// adjustment for class based on selected evaluation metric
575	result.disable(Capability.NUMERIC_CLASS);
576	result.disable(Capability.DATE_CLASS);
577	if (m_evaluationMeasure != EVAL_ACCURACY && m_evaluationMeasure != EVAL_FMEASURE &&
578	m_evaluationMeasure != EVAL_AUC) {
579	result.enable(Capability.NUMERIC_CLASS);
580	result.enable(Capability.DATE_CLASS);
581	}
582
583	result.setMinimumNumberInstances(getFolds());
584
585	return result;
586	}
587
588	/**
589	* Generates a attribute evaluator. Has to initialize all fields of the
590	* evaluator that are not being set via options.
591	*
592	* @param data set of instances serving as training data
593	* @throws Exception if the evaluator has not been
594	* generated successfully
595	*/
596	public void buildEvaluator (Instances data)
597	throws Exception {
598
599	// can evaluator handle data?
600	getCapabilities().testWithFail(data);
601
602	m_trainInstances = data;
603	m_classIndex = m_trainInstances.classIndex();
604	m_numAttribs = m_trainInstances.numAttributes();
605	m_numInstances = m_trainInstances.numInstances();
606	}
607
608
609	/**
610	* Evaluates a subset of attributes
611	*
612	* @param subset a bitset representing the attribute subset to be
613	* evaluated
614	* @return the error rate
615	* @throws Exception if the subset could not be evaluated
616	*/
617	public double evaluateSubset (BitSet subset)
618	throws Exception {
619	double evalMetric = 0;
620	double[] repError = new double[5];
621	int numAttributes = 0;
622	int i, j;
623	Random Rnd = new Random(m_seed);
624	Remove delTransform = new Remove();
625	delTransform.setInvertSelection(true);
626	// copy the instances
627	Instances trainCopy = new Instances(m_trainInstances);
628
629	// count attributes set in the BitSet
630	for (i = 0; i < m_numAttribs; i++) {
631	if (subset.get(i)) {
632	numAttributes++;
633	}
634	}
635
636	// set up an array of attribute indexes for the filter (+1 for the class)
637	int[] featArray = new int[numAttributes + 1];
638
639	for (i = 0, j = 0; i < m_numAttribs; i++) {
640	if (subset.get(i)) {
641	featArray[j++] = i;
642	}
643	}
644
645	featArray[j] = m_classIndex;
646	delTransform.setAttributeIndicesArray(featArray);
647	delTransform.setInputFormat(trainCopy);
648	trainCopy = Filter.useFilter(trainCopy, delTransform);
649
650	// max of 5 repetitions of cross validation
651	for (i = 0; i < 5; i++) {
652	m_Evaluation = new Evaluation(trainCopy);
653	m_Evaluation.crossValidateModel(m_BaseClassifier, trainCopy, m_folds, Rnd);
654
655	switch (m_evaluationMeasure) {
656	case EVAL_DEFAULT:
657	repError[i] = m_Evaluation.errorRate();
658	break;
659	case EVAL_ACCURACY:
660	repError[i] = m_Evaluation.errorRate();
661	break;
662	case EVAL_RMSE:
663	repError[i] = m_Evaluation.rootMeanSquaredError();
664	break;
665	case EVAL_MAE:
666	repError[i] = m_Evaluation.meanAbsoluteError();
667	break;
668	case EVAL_FMEASURE:
669	repError[i] = m_Evaluation.weightedFMeasure();
670	break;
671	case EVAL_AUC:
672	repError[i] = m_Evaluation.weightedAreaUnderROC();
673	break;
674	}
675
676	// check on the standard deviation
677	if (!repeat(repError, i + 1)) {
678	i++;
679	break;
680	}
681	}
682
683	for (j = 0; j < i; j++) {
684	evalMetric += repError[j];
685	}
686
687	evalMetric /= (double)i;
688	m_Evaluation = null;
689
690	switch (m_evaluationMeasure) {
691	case EVAL_DEFAULT:
692	case EVAL_ACCURACY:
693	case EVAL_RMSE:
694	case EVAL_MAE:
695	evalMetric = -evalMetric; // maximize
696	break;
697	}
698
699	return evalMetric;
700	}
701
702
703	/**
704	* Returns a string describing the wrapper
705	*
706	* @return the description as a string
707	*/
708	public String toString () {
709	StringBuffer text = new StringBuffer();
710
711	if (m_trainInstances == null) {
712	text.append("\tWrapper subset evaluator has not been built yet\n");
713	}
714	else {
715	text.append("\tWrapper Subset Evaluator\n");
716	text.append("\tLearning scheme: "
717	+ getClassifier().getClass().getName() + "\n");
718	text.append("\tScheme options: ");
719	String[] classifierOptions = new String[0];
720
721	if (m_BaseClassifier instanceof OptionHandler) {
722	classifierOptions = ((OptionHandler)m_BaseClassifier).getOptions();
723
724	for (int i = 0; i < classifierOptions.length; i++) {
725	text.append(classifierOptions[i] + " ");
726	}
727	}
728
729	text.append("\n");
730	switch (m_evaluationMeasure) {
731	case EVAL_DEFAULT:
732	case EVAL_ACCURACY:
733	if (m_trainInstances.attribute(m_classIndex).isNumeric()) {
734	text.append("\tSubset evaluation: RMSE\n");
735	} else {
736	text.append("\tSubset evaluation: classification error\n");
737	}
738	break;
739	case EVAL_RMSE:
740	if (m_trainInstances.attribute(m_classIndex).isNumeric()) {
741	text.append("\tSubset evaluation: RMSE\n");
742	} else {
743	text.append("\tSubset evaluation: RMSE (probability estimates)\n");
744	}
745	break;
746	case EVAL_MAE:
747	if (m_trainInstances.attribute(m_classIndex).isNumeric()) {
748	text.append("\tSubset evaluation: MAE\n");
749	} else {
750	text.append("\tSubset evaluation: MAE (probability estimates)\n");
751	}
752	break;
753	case EVAL_FMEASURE:
754	text.append("\tSubset evaluation: F-measure\n");
755	break;
756	case EVAL_AUC:
757	text.append("\tSubset evaluation: area under the ROC curve\n");
758	break;
759	}
760
761	text.append("\tNumber of folds for accuracy estimation: "
762	+ m_folds
763	+ "\n");
764	}
765
766	return text.toString();
767	}
768
769
770	/**
771	* decides whether to do another repeat of cross validation. If the
772	* standard deviation of the cross validations
773	* is greater than threshold% of the mean (default 1%) then another
774	* repeat is done.
775	*
776	* @param repError an array of cross validation results
777	* @param entries the number of cross validations done so far
778	* @return true if another cv is to be done
779	*/
780	private boolean repeat (double[] repError, int entries) {
781	int i;
782	double mean = 0;
783	double variance = 0;
784
785	if (entries == 1) {
786	return true;
787	}
788
789	for (i = 0; i < entries; i++) {
790	mean += repError[i];
791	}
792
793	mean /= (double)entries;
794
795	for (i = 0; i < entries; i++) {
796	variance += ((repError[i] - mean)*(repError[i] - mean));
797	}
798
799	variance /= (double)entries;
800
801	if (variance > 0) {
802	variance = Math.sqrt(variance);
803	}
804
805	if ((variance/mean) > m_threshold) {
806	return true;
807	}
808
809	return false;
810	}
811
812	/**
813	* Returns the revision string.
814	*
815	* @return the revision
816	*/
817	public String getRevision() {
818	return RevisionUtils.extract("$Revision: 5928 $");
819	}
820
821	/**
822	* Main method for testing this class.
823	*
824	* @param args the options
825	*/
826	public static void main (String[] args) {
827	runEvaluator(new WrapperSubsetEval(), args);
828	}
829	}
830

Note: See TracBrowser for help on using the repository browser.

Download in other formats: