Context Navigation

WrapperSubsetEval.java

Last change on this file was 29, checked in by gnappo, 14 years ago
Taggata versione per la demo e aggiunto branch.
File size: 23.8 KB

Rev	Line
[29]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* WrapperSubsetEval.java
	19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23	package weka.attributeSelection;
	24
	25	import weka.classifiers.Classifier;
	26	import weka.classifiers.AbstractClassifier;
	27	import weka.classifiers.Evaluation;
	28	import weka.classifiers.rules.ZeroR;
	29	import weka.core.Capabilities;
	30	import weka.core.Instances;
	31	import weka.core.Option;
	32	import weka.core.OptionHandler;
	33	import weka.core.RevisionUtils;
	34	import weka.core.SelectedTag;
	35	import weka.core.Tag;
	36	import weka.core.TechnicalInformation;
	37	import weka.core.TechnicalInformationHandler;
	38	import weka.core.Utils;
	39	import weka.core.Capabilities.Capability;
	40	import weka.core.TechnicalInformation.Field;
	41	import weka.core.TechnicalInformation.Type;
	42	import weka.filters.Filter;
	43	import weka.filters.unsupervised.attribute.Remove;
	44
	45	import java.util.BitSet;
	46	import java.util.Enumeration;
	47	import java.util.Random;
	48	import java.util.Vector;
	49
	50	/**
	51	<!-- globalinfo-start -->
	52	* WrapperSubsetEval:<br/>
	53	* <br/>
	54	* Evaluates attribute sets by using a learning scheme. Cross validation is used to estimate the accuracy of the learning scheme for a set of attributes.<br/>
	55	* <br/>
	56	* For more information see:<br/>
	57	* <br/>
	58	* Ron Kohavi, George H. John (1997). Wrappers for feature subset selection. Artificial Intelligence. 97(1-2):273-324.
	59	* <p/>
	60	<!-- globalinfo-end -->
	61	*
	62	<!-- technical-bibtex-start -->
	63	* BibTeX:
	64	* <pre>
	65	* @article{Kohavi1997,
	66	* author = {Ron Kohavi and George H. John},
	67	* journal = {Artificial Intelligence},
	68	* note = {Special issue on relevance},
	69	* number = {1-2},
	70	* pages = {273-324},
	71	* title = {Wrappers for feature subset selection},
	72	* volume = {97},
	73	* year = {1997},
	74	* ISSN = {0004-3702}
	75	* }
	76	* </pre>
	77	* <p/>
	78	<!-- technical-bibtex-end -->
	79	*
	80	<!-- options-start -->
	81	* Valid options are: <p/>
	82	*
	83	* <pre> -B <base learner>
	84	* class name of base learner to use for accuracy estimation.
	85	* Place any classifier options LAST on the command line
	86	* following a "--". eg.:
	87	* -B weka.classifiers.bayes.NaiveBayes ... -- -K
	88	* (default: weka.classifiers.rules.ZeroR)</pre>
	89	*
	90	* <pre> -F <num>
	91	* number of cross validation folds to use for estimating accuracy.
	92	* (default=5)</pre>
	93	*
	94	* <pre> -R <seed>
	95	* Seed for cross validation accuracy testimation.
	96	* (default = 1)</pre>
	97	*
	98	* <pre> -T <num>
	99	* threshold by which to execute another cross validation
	100	* (standard deviation---expressed as a percentage of the mean).
	101	* (default: 0.01 (1%))</pre>
	102	*
	103	* <pre> -E <acc \| rmse \| mae \| f-meas \| auc>
	104	* Performance evaluation measure to use for selecting attributes.
	105	* (Default = accuracy for discrete class and rmse for numeric class)</pre>
	106	*
	107	* <pre>
	108	* Options specific to scheme weka.classifiers.rules.ZeroR:
	109	* </pre>
	110	*
	111	* <pre> -D
	112	* If set, classifier is run in debug mode and
	113	* may output additional info to the console</pre>
	114	*
	115	<!-- options-end -->
	116	*
	117	* @author Mark Hall (mhall@cs.waikato.ac.nz)
	118	* @version $Revision: 5928 $
	119	*/
	120	public class WrapperSubsetEval
	121	extends ASEvaluation
	122	implements SubsetEvaluator,
	123	OptionHandler,
	124	TechnicalInformationHandler {
	125
	126	/** for serialization */
	127	static final long serialVersionUID = -4573057658746728675L;
	128
	129	/** training instances */
	130	private Instances m_trainInstances;
	131	/** class index */
	132	private int m_classIndex;
	133	/** number of attributes in the training data */
	134	private int m_numAttribs;
	135	/** number of instances in the training data */
	136	private int m_numInstances;
	137	/** holds an evaluation object */
	138	private Evaluation m_Evaluation;
	139	/** holds the base classifier object */
	140	private Classifier m_BaseClassifier;
	141	/** number of folds to use for cross validation */
	142	private int m_folds;
	143	/** random number seed */
	144	private int m_seed;
	145	/**
	146	* the threshold by which to do further cross validations when
	147	* estimating the accuracy of a subset
	148	*/
	149	private double m_threshold;
	150
	151	public static final int EVAL_DEFAULT = 1;
	152	public static final int EVAL_ACCURACY = 2;
	153	public static final int EVAL_RMSE = 3;
	154	public static final int EVAL_MAE = 4;
	155	public static final int EVAL_FMEASURE = 5;
	156	public static final int EVAL_AUC = 6;
	157
	158	public static final Tag[] TAGS_EVALUATION = {
	159	new Tag(EVAL_DEFAULT, "Default: accuracy (discrete class); RMSE (numeric class)"),
	160	new Tag(EVAL_ACCURACY, "Accuracy (discrete class only)"),
	161	new Tag(EVAL_RMSE, "RMSE (of the class probabilities for discrete class)"),
	162	new Tag(EVAL_MAE, "MAE (of the class probabilities for discrete class)"),
	163	new Tag(EVAL_FMEASURE, "F-measure (discrete class only)"),
	164	new Tag(EVAL_AUC, "AUC (area under the ROC curve - discrete class only)")
	165	};
	166
	167	/** The evaluation measure to use */
	168	protected int m_evaluationMeasure = EVAL_DEFAULT;
	169
	170	/**
	171	* Returns a string describing this attribute evaluator
	172	* @return a description of the evaluator suitable for
	173	* displaying in the explorer/experimenter gui
	174	*/
	175	public String globalInfo() {
	176	return "WrapperSubsetEval:\n\n"
	177	+"Evaluates attribute sets by using a learning scheme. Cross "
	178	+"validation is used to estimate the accuracy of the learning "
	179	+"scheme for a set of attributes.\n\n"
	180	+ "For more information see:\n\n"
	181	+ getTechnicalInformation().toString();
	182	}
	183
	184	/**
	185	* Returns an instance of a TechnicalInformation object, containing
	186	* detailed information about the technical background of this class,
	187	* e.g., paper reference or book this class is based on.
	188	*
	189	* @return the technical information about this class
	190	*/
	191	public TechnicalInformation getTechnicalInformation() {
	192	TechnicalInformation result;
	193
	194	result = new TechnicalInformation(Type.ARTICLE);
	195	result.setValue(Field.AUTHOR, "Ron Kohavi and George H. John");
	196	result.setValue(Field.YEAR, "1997");
	197	result.setValue(Field.TITLE, "Wrappers for feature subset selection");
	198	result.setValue(Field.JOURNAL, "Artificial Intelligence");
	199	result.setValue(Field.VOLUME, "97");
	200	result.setValue(Field.NUMBER, "1-2");
	201	result.setValue(Field.PAGES, "273-324");
	202	result.setValue(Field.NOTE, "Special issue on relevance");
	203	result.setValue(Field.ISSN, "0004-3702");
	204
	205	return result;
	206	}
	207
	208	/**
	209	* Constructor. Calls restOptions to set default options
	210	**/
	211	public WrapperSubsetEval () {
	212	resetOptions();
	213	}
	214
	215
	216	/**
	217	* Returns an enumeration describing the available options.
	218	* @return an enumeration of all the available options.
	219	**/
	220	public Enumeration listOptions () {
	221	Vector newVector = new Vector(4);
	222	newVector.addElement(new Option(
	223	"\tclass name of base learner to use for \taccuracy estimation.\n"
	224	+ "\tPlace any classifier options LAST on the command line\n"
	225	+ "\tfollowing a \"--\". eg.:\n"
	226	+ "\t\t-B weka.classifiers.bayes.NaiveBayes ... -- -K\n"
	227	+ "\t(default: weka.classifiers.rules.ZeroR)",
	228	"B", 1, "-B <base learner>"));
	229
	230	newVector.addElement(new Option(
	231	"\tnumber of cross validation folds to use for estimating accuracy.\n"
	232	+ "\t(default=5)",
	233	"F", 1, "-F <num>"));
	234
	235	newVector.addElement(new Option(
	236	"\tSeed for cross validation accuracy testimation.\n"
	237	+ "\t(default = 1)",
	238	"R", 1,"-R <seed>"));
	239
	240	newVector.addElement(new Option(
	241	"\tthreshold by which to execute another cross validation\n"
	242	+ "\t(standard deviation---expressed as a percentage of the mean).\n"
	243	+ "\t(default: 0.01 (1%))",
	244	"T", 1, "-T <num>"));
	245
	246	newVector.addElement(new Option(
	247	"\tPerformance evaluation measure to use for selecting attributes.\n" +
	248	"\t(Default = accuracy for discrete class and rmse for numeric class)",
	249	"E", 1, "-E <acc \| rmse \| mae \| f-meas \| auc>"));
	250
	251	if ((m_BaseClassifier != null) &&
	252	(m_BaseClassifier instanceof OptionHandler)) {
	253	newVector.addElement(new Option("", "", 0, "\nOptions specific to scheme "
	254	+ m_BaseClassifier.getClass().getName()
	255	+ ":"));
	256	Enumeration enu = ((OptionHandler)m_BaseClassifier).listOptions();
	257
	258	while (enu.hasMoreElements()) {
	259	newVector.addElement(enu.nextElement());
	260	}
	261	}
	262
	263	return newVector.elements();
	264	}
	265
	266
	267	/**
	268	* Parses a given list of options. <p/>
	269	*
	270	<!-- options-start -->
	271	* Valid options are: <p/>
	272	*
	273	* <pre> -B <base learner>
	274	* class name of base learner to use for accuracy estimation.
	275	* Place any classifier options LAST on the command line
	276	* following a "--". eg.:
	277	* -B weka.classifiers.bayes.NaiveBayes ... -- -K
	278	* (default: weka.classifiers.rules.ZeroR)</pre>
	279	*
	280	* <pre> -F <num>
	281	* number of cross validation folds to use for estimating accuracy.
	282	* (default=5)</pre>
	283	*
	284	* <pre> -R <seed>
	285	* Seed for cross validation accuracy testimation.
	286	* (default = 1)</pre>
	287	*
	288	* <pre> -T <num>
	289	* threshold by which to execute another cross validation
	290	* (standard deviation---expressed as a percentage of the mean).
	291	* (default: 0.01 (1%))</pre>
	292	*
	293	* <pre> -E <acc \| rmse \| mae \| f-meas \| auc>
	294	* Performance evaluation measure to use for selecting attributes.
	295	* (Default = accuracy for discrete class and rmse for numeric class)</pre>
	296	*
	297	* <pre>
	298	* Options specific to scheme weka.classifiers.rules.ZeroR:
	299	* </pre>
	300	*
	301	* <pre> -D
	302	* If set, classifier is run in debug mode and
	303	* may output additional info to the console</pre>
	304	*
	305	<!-- options-end -->
	306	*
	307	* @param options the list of options as an array of strings
	308	* @throws Exception if an option is not supported
	309	*/
	310	public void setOptions (String[] options)
	311	throws Exception {
	312	String optionString;
	313	resetOptions();
	314	optionString = Utils.getOption('B', options);
	315
	316	if (optionString.length() == 0)
	317	optionString = ZeroR.class.getName();
	318	setClassifier(AbstractClassifier.forName(optionString,
	319	Utils.partitionOptions(options)));
	320	optionString = Utils.getOption('F', options);
	321
	322	if (optionString.length() != 0) {
	323	setFolds(Integer.parseInt(optionString));
	324	}
	325
	326	optionString = Utils.getOption('R', options);
	327	if (optionString.length() != 0) {
	328	setSeed(Integer.parseInt(optionString));
	329	}
	330
	331	// optionString = Utils.getOption('S',options);
	332	// if (optionString.length() != 0)
	333	// {
	334	// seed = Integer.parseInt(optionString);
	335	// }
	336	optionString = Utils.getOption('T', options);
	337
	338	if (optionString.length() != 0) {
	339	Double temp;
	340	temp = Double.valueOf(optionString);
	341	setThreshold(temp.doubleValue());
	342	}
	343
	344	optionString = Utils.getOption('E', options);
	345	if (optionString.length() != 0) {
	346	if (optionString.equals("acc")) {
	347	setEvaluationMeasure(new SelectedTag(EVAL_ACCURACY, TAGS_EVALUATION));
	348	} else if (optionString.equals("rmse")) {
	349	setEvaluationMeasure(new SelectedTag(EVAL_RMSE, TAGS_EVALUATION));
	350	} else if (optionString.equals("mae")) {
	351	setEvaluationMeasure(new SelectedTag(EVAL_MAE, TAGS_EVALUATION));
	352	} else if (optionString.equals("f-meas")) {
	353	setEvaluationMeasure(new SelectedTag(EVAL_FMEASURE, TAGS_EVALUATION));
	354	} else if (optionString.equals("auc")) {
	355	setEvaluationMeasure(new SelectedTag(EVAL_AUC, TAGS_EVALUATION));
	356	} else {
	357	throw new IllegalArgumentException("Invalid evaluation measure");
	358	}
	359	}
	360	}
	361
	362	/**
	363	* Returns the tip text for this property
	364	* @return tip text for this property suitable for
	365	* displaying in the explorer/experimenter gui
	366	*/
	367	public String evaluationMeasureTipText() {
	368	return "The measure used to evaluate the performance of attribute combinations.";
	369	}
	370	/**
	371	* Gets the currently set performance evaluation measure used for selecting
	372	* attributes for the decision table
	373	*
	374	* @return the performance evaluation measure
	375	*/
	376	public SelectedTag getEvaluationMeasure() {
	377	return new SelectedTag(m_evaluationMeasure, TAGS_EVALUATION);
	378	}
	379
	380	/**
	381	* Sets the performance evaluation measure to use for selecting attributes
	382	* for the decision table
	383	*
	384	* @param newMethod the new performance evaluation metric to use
	385	*/
	386	public void setEvaluationMeasure(SelectedTag newMethod) {
	387	if (newMethod.getTags() == TAGS_EVALUATION) {
	388	m_evaluationMeasure = newMethod.getSelectedTag().getID();
	389	}
	390	}
	391
	392	/**
	393	* Returns the tip text for this property
	394	* @return tip text for this property suitable for
	395	* displaying in the explorer/experimenter gui
	396	*/
	397	public String thresholdTipText() {
	398	return "Repeat xval if stdev of mean exceeds this value.";
	399	}
	400
	401	/**
	402	* Set the value of the threshold for repeating cross validation
	403	*
	404	* @param t the value of the threshold
	405	*/
	406	public void setThreshold (double t) {
	407	m_threshold = t;
	408	}
	409
	410
	411	/**
	412	* Get the value of the threshold
	413	*
	414	* @return the threshold as a double
	415	*/
	416	public double getThreshold () {
	417	return m_threshold;
	418	}
	419
	420	/**
	421	* Returns the tip text for this property
	422	* @return tip text for this property suitable for
	423	* displaying in the explorer/experimenter gui
	424	*/
	425	public String foldsTipText() {
	426	return "Number of xval folds to use when estimating subset accuracy.";
	427	}
	428
	429	/**
	430	* Set the number of folds to use for accuracy estimation
	431	*
	432	* @param f the number of folds
	433	*/
	434	public void setFolds (int f) {
	435	m_folds = f;
	436	}
	437
	438
	439	/**
	440	* Get the number of folds used for accuracy estimation
	441	*
	442	* @return the number of folds
	443	*/
	444	public int getFolds () {
	445	return m_folds;
	446	}
	447
	448	/**
	449	* Returns the tip text for this property
	450	* @return tip text for this property suitable for
	451	* displaying in the explorer/experimenter gui
	452	*/
	453	public String seedTipText() {
	454	return "Seed to use for randomly generating xval splits.";
	455	}
	456
	457	/**
	458	* Set the seed to use for cross validation
	459	*
	460	* @param s the seed
	461	*/
	462	public void setSeed (int s) {
	463	m_seed = s;
	464	}
	465
	466
	467	/**
	468	* Get the random number seed used for cross validation
	469	*
	470	* @return the seed
	471	*/
	472	public int getSeed () {
	473	return m_seed;
	474	}
	475
	476	/**
	477	* Returns the tip text for this property
	478	* @return tip text for this property suitable for
	479	* displaying in the explorer/experimenter gui
	480	*/
	481	public String classifierTipText() {
	482	return "Classifier to use for estimating the accuracy of subsets";
	483	}
	484
	485	/**
	486	* Set the classifier to use for accuracy estimation
	487	*
	488	* @param newClassifier the Classifier to use.
	489	*/
	490	public void setClassifier (Classifier newClassifier) {
	491	m_BaseClassifier = newClassifier;
	492	}
	493
	494
	495	/**
	496	* Get the classifier used as the base learner.
	497	*
	498	* @return the classifier used as the classifier
	499	*/
	500	public Classifier getClassifier () {
	501	return m_BaseClassifier;
	502	}
	503
	504
	505	/**
	506	* Gets the current settings of WrapperSubsetEval.
	507	*
	508	* @return an array of strings suitable for passing to setOptions()
	509	*/
	510	public String[] getOptions () {
	511	String[] classifierOptions = new String[0];
	512
	513	if ((m_BaseClassifier != null) &&
	514	(m_BaseClassifier instanceof OptionHandler)) {
	515	classifierOptions = ((OptionHandler)m_BaseClassifier).getOptions();
	516	}
	517
	518	String[] options = new String[9 + classifierOptions.length];
	519	int current = 0;
	520
	521	if (getClassifier() != null) {
	522	options[current++] = "-B";
	523	options[current++] = getClassifier().getClass().getName();
	524	}
	525
	526	options[current++] = "-F";
	527	options[current++] = "" + getFolds();
	528	options[current++] = "-T";
	529	options[current++] = "" + getThreshold();
	530	options[current++] = "-R";
	531	options[current++] = "" + getSeed();
	532	options[current++] = "--";
	533	System.arraycopy(classifierOptions, 0, options, current,
	534	classifierOptions.length);
	535	current += classifierOptions.length;
	536
	537	while (current < options.length) {
	538	options[current++] = "";
	539	}
	540
	541	return options;
	542	}
	543
	544
	545	protected void resetOptions () {
	546	m_trainInstances = null;
	547	m_Evaluation = null;
	548	m_BaseClassifier = new ZeroR();
	549	m_folds = 5;
	550	m_seed = 1;
	551	m_threshold = 0.01;
	552	}
	553
	554	/**
	555	* Returns the capabilities of this evaluator.
	556	*
	557	* @return the capabilities of this evaluator
	558	* @see Capabilities
	559	*/
	560	public Capabilities getCapabilities() {
	561	Capabilities result;
	562
	563	if (getClassifier() == null) {
	564	result = super.getCapabilities();
	565	result.disableAll();
	566	} else {
	567	result = getClassifier().getCapabilities();
	568	}
	569
	570	// set dependencies
	571	for (Capability cap: Capability.values())
	572	result.enableDependency(cap);
	573
	574	// adjustment for class based on selected evaluation metric
	575	result.disable(Capability.NUMERIC_CLASS);
	576	result.disable(Capability.DATE_CLASS);
	577	if (m_evaluationMeasure != EVAL_ACCURACY && m_evaluationMeasure != EVAL_FMEASURE &&
	578	m_evaluationMeasure != EVAL_AUC) {
	579	result.enable(Capability.NUMERIC_CLASS);
	580	result.enable(Capability.DATE_CLASS);
	581	}
	582
	583	result.setMinimumNumberInstances(getFolds());
	584
	585	return result;
	586	}
	587
	588	/**
	589	* Generates a attribute evaluator. Has to initialize all fields of the
	590	* evaluator that are not being set via options.
	591	*
	592	* @param data set of instances serving as training data
	593	* @throws Exception if the evaluator has not been
	594	* generated successfully
	595	*/
	596	public void buildEvaluator (Instances data)
	597	throws Exception {
	598
	599	// can evaluator handle data?
	600	getCapabilities().testWithFail(data);
	601
	602	m_trainInstances = data;
	603	m_classIndex = m_trainInstances.classIndex();
	604	m_numAttribs = m_trainInstances.numAttributes();
	605	m_numInstances = m_trainInstances.numInstances();
	606	}
	607
	608
	609	/**
	610	* Evaluates a subset of attributes
	611	*
	612	* @param subset a bitset representing the attribute subset to be
	613	* evaluated
	614	* @return the error rate
	615	* @throws Exception if the subset could not be evaluated
	616	*/
	617	public double evaluateSubset (BitSet subset)
	618	throws Exception {
	619	double evalMetric = 0;
	620	double[] repError = new double[5];
	621	int numAttributes = 0;
	622	int i, j;
	623	Random Rnd = new Random(m_seed);
	624	Remove delTransform = new Remove();
	625	delTransform.setInvertSelection(true);
	626	// copy the instances
	627	Instances trainCopy = new Instances(m_trainInstances);
	628
	629	// count attributes set in the BitSet
	630	for (i = 0; i < m_numAttribs; i++) {
	631	if (subset.get(i)) {
	632	numAttributes++;
	633	}
	634	}
	635
	636	// set up an array of attribute indexes for the filter (+1 for the class)
	637	int[] featArray = new int[numAttributes + 1];
	638
	639	for (i = 0, j = 0; i < m_numAttribs; i++) {
	640	if (subset.get(i)) {
	641	featArray[j++] = i;
	642	}
	643	}
	644
	645	featArray[j] = m_classIndex;
	646	delTransform.setAttributeIndicesArray(featArray);
	647	delTransform.setInputFormat(trainCopy);
	648	trainCopy = Filter.useFilter(trainCopy, delTransform);
	649
	650	// max of 5 repetitions of cross validation
	651	for (i = 0; i < 5; i++) {
	652	m_Evaluation = new Evaluation(trainCopy);
	653	m_Evaluation.crossValidateModel(m_BaseClassifier, trainCopy, m_folds, Rnd);
	654
	655	switch (m_evaluationMeasure) {
	656	case EVAL_DEFAULT:
	657	repError[i] = m_Evaluation.errorRate();
	658	break;
	659	case EVAL_ACCURACY:
	660	repError[i] = m_Evaluation.errorRate();
	661	break;
	662	case EVAL_RMSE:
	663	repError[i] = m_Evaluation.rootMeanSquaredError();
	664	break;
	665	case EVAL_MAE:
	666	repError[i] = m_Evaluation.meanAbsoluteError();
	667	break;
	668	case EVAL_FMEASURE:
	669	repError[i] = m_Evaluation.weightedFMeasure();
	670	break;
	671	case EVAL_AUC:
	672	repError[i] = m_Evaluation.weightedAreaUnderROC();
	673	break;
	674	}
	675
	676	// check on the standard deviation
	677	if (!repeat(repError, i + 1)) {
	678	i++;
	679	break;
	680	}
	681	}
	682
	683	for (j = 0; j < i; j++) {
	684	evalMetric += repError[j];
	685	}
	686
	687	evalMetric /= (double)i;
	688	m_Evaluation = null;
	689
	690	switch (m_evaluationMeasure) {
	691	case EVAL_DEFAULT:
	692	case EVAL_ACCURACY:
	693	case EVAL_RMSE:
	694	case EVAL_MAE:
	695	evalMetric = -evalMetric; // maximize
	696	break;
	697	}
	698
	699	return evalMetric;
	700	}
	701
	702
	703	/**
	704	* Returns a string describing the wrapper
	705	*
	706	* @return the description as a string
	707	*/
	708	public String toString () {
	709	StringBuffer text = new StringBuffer();
	710
	711	if (m_trainInstances == null) {
	712	text.append("\tWrapper subset evaluator has not been built yet\n");
	713	}
	714	else {
	715	text.append("\tWrapper Subset Evaluator\n");
	716	text.append("\tLearning scheme: "
	717	+ getClassifier().getClass().getName() + "\n");
	718	text.append("\tScheme options: ");
	719	String[] classifierOptions = new String[0];
	720
	721	if (m_BaseClassifier instanceof OptionHandler) {
	722	classifierOptions = ((OptionHandler)m_BaseClassifier).getOptions();
	723
	724	for (int i = 0; i < classifierOptions.length; i++) {
	725	text.append(classifierOptions[i] + " ");
	726	}
	727	}
	728
	729	text.append("\n");
	730	switch (m_evaluationMeasure) {
	731	case EVAL_DEFAULT:
	732	case EVAL_ACCURACY:
	733	if (m_trainInstances.attribute(m_classIndex).isNumeric()) {
	734	text.append("\tSubset evaluation: RMSE\n");
	735	} else {
	736	text.append("\tSubset evaluation: classification error\n");
	737	}
	738	break;
	739	case EVAL_RMSE:
	740	if (m_trainInstances.attribute(m_classIndex).isNumeric()) {
	741	text.append("\tSubset evaluation: RMSE\n");
	742	} else {
	743	text.append("\tSubset evaluation: RMSE (probability estimates)\n");
	744	}
	745	break;
	746	case EVAL_MAE:
	747	if (m_trainInstances.attribute(m_classIndex).isNumeric()) {
	748	text.append("\tSubset evaluation: MAE\n");
	749	} else {
	750	text.append("\tSubset evaluation: MAE (probability estimates)\n");
	751	}
	752	break;
	753	case EVAL_FMEASURE:
	754	text.append("\tSubset evaluation: F-measure\n");
	755	break;
	756	case EVAL_AUC:
	757	text.append("\tSubset evaluation: area under the ROC curve\n");
	758	break;
	759	}
	760
	761	text.append("\tNumber of folds for accuracy estimation: "
	762	+ m_folds
	763	+ "\n");
	764	}
	765
	766	return text.toString();
	767	}
	768
	769
	770	/**
	771	* decides whether to do another repeat of cross validation. If the
	772	* standard deviation of the cross validations
	773	* is greater than threshold% of the mean (default 1%) then another
	774	* repeat is done.
	775	*
	776	* @param repError an array of cross validation results
	777	* @param entries the number of cross validations done so far
	778	* @return true if another cv is to be done
	779	*/
	780	private boolean repeat (double[] repError, int entries) {
	781	int i;
	782	double mean = 0;
	783	double variance = 0;
	784
	785	if (entries == 1) {
	786	return true;
	787	}
	788
	789	for (i = 0; i < entries; i++) {
	790	mean += repError[i];
	791	}
	792
	793	mean /= (double)entries;
	794
	795	for (i = 0; i < entries; i++) {
	796	variance += ((repError[i] - mean)*(repError[i] - mean));
	797	}
	798
	799	variance /= (double)entries;
	800
	801	if (variance > 0) {
	802	variance = Math.sqrt(variance);
	803	}
	804
	805	if ((variance/mean) > m_threshold) {
	806	return true;
	807	}
	808
	809	return false;
	810	}
	811
	812	/**
	813	* Returns the revision string.
	814	*
	815	* @return the revision
	816	*/
	817	public String getRevision() {
	818	return RevisionUtils.extract("$Revision: 5928 $");
	819	}
	820
	821	/**
	822	* Main method for testing this class.
	823	*
	824	* @param args the options
	825	*/
	826	public static void main (String[] args) {
	827	runEvaluator(new WrapperSubsetEval(), args);
	828	}
	829	}
	830

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/MetisMQI/src/main/java/weka/attributeSelection/WrapperSubsetEval.java

Download in other formats: