Context Navigation

source: src/main/java/weka/attributeSelection/CheckAttributeSelection.java @ 4

Last change on this file since 4 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 55.9 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* CheckAttributeSelection.java
	19	* Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23	package weka.attributeSelection;
	24
	25	import weka.core.Attribute;
	26	import weka.core.CheckScheme;
	27	import weka.core.FastVector;
	28	import weka.core.Instances;
	29	import weka.core.MultiInstanceCapabilitiesHandler;
	30	import weka.core.Option;
	31	import weka.core.OptionHandler;
	32	import weka.core.RevisionUtils;
	33	import weka.core.SerializationHelper;
	34	import weka.core.SerializedObject;
	35	import weka.core.TestInstances;
	36	import weka.core.Utils;
	37	import weka.core.WeightedInstancesHandler;
	38
	39	import java.util.Enumeration;
	40	import java.util.Random;
	41	import java.util.Vector;
	42
	43	/**
	44	* Class for examining the capabilities and finding problems with
	45	* attribute selection schemes. If you implement an attribute selection using
	46	* the WEKA.libraries, you should run the checks on it to ensure robustness
	47	* and correct operation. Passing all the tests of this object does not mean
	48	* bugs in the attribute selection don't exist, but this will help find some
	49	* common ones. <p/>
	50	*
	51	* Typical usage: <p/>
	52	* <code>java weka.attributeSelection.CheckAttributeSelection -W ASscheme_name
	53	* -- ASscheme_options </code><p/>
	54	*
	55	* CheckAttributeSelection reports on the following:
	56	* <ul>
	57	* <li> Scheme abilities
	58	* <ul>
	59	* <li> Possible command line options to the scheme </li>
	60	* <li> Whether the scheme can predict nominal, numeric, string,
	61	* date or relational class attributes. </li>
	62	* <li> Whether the scheme can handle numeric predictor attributes </li>
	63	* <li> Whether the scheme can handle nominal predictor attributes </li>
	64	* <li> Whether the scheme can handle string predictor attributes </li>
	65	* <li> Whether the scheme can handle date predictor attributes </li>
	66	* <li> Whether the scheme can handle relational predictor attributes </li>
	67	* <li> Whether the scheme can handle multi-instance data </li>
	68	* <li> Whether the scheme can handle missing predictor values </li>
	69	* <li> Whether the scheme can handle missing class values </li>
	70	* <li> Whether a nominal scheme only handles 2 class problems </li>
	71	* <li> Whether the scheme can handle instance weights </li>
	72	* </ul>
	73	* </li>
	74	* <li> Correct functioning
	75	* <ul>
	76	* <li> Correct initialisation during search (i.e. no result
	77	* changes when search is performed repeatedly) </li>
	78	* <li> Whether the scheme alters the data pased to it
	79	* (number of instances, instance order, instance weights, etc) </li>
	80	* </ul>
	81	* </li>
	82	* <li> Degenerate cases
	83	* <ul>
	84	* <li> building scheme with zero instances </li>
	85	* <li> all but one predictor attribute values missing </li>
	86	* <li> all predictor attribute values missing </li>
	87	* <li> all but one class values missing </li>
	88	* <li> all class values missing </li>
	89	* </ul>
	90	* </li>
	91	* </ul>
	92	* Running CheckAttributeSelection with the debug option set will output the
	93	* training dataset for any failed tests.<p/>
	94	*
	95	* The <code>weka.attributeSelection.AbstractAttributeSelectionTest</code>
	96	* uses this class to test all the schemes. Any changes here, have to be
	97	* checked in that abstract test class, too. <p/>
	98	*
	99	<!-- options-start -->
	100	* Valid options are: <p/>
	101	*
	102	* <pre> -D
	103	* Turn on debugging output.</pre>
	104	*
	105	* <pre> -S
	106	* Silent mode - prints nothing to stdout.</pre>
	107	*
	108	* <pre> -N <num>
	109	* The number of instances in the datasets (default 20).</pre>
	110	*
	111	* <pre> -nominal <num>
	112	* The number of nominal attributes (default 2).</pre>
	113	*
	114	* <pre> -nominal-values <num>
	115	* The number of values for nominal attributes (default 1).</pre>
	116	*
	117	* <pre> -numeric <num>
	118	* The number of numeric attributes (default 1).</pre>
	119	*
	120	* <pre> -string <num>
	121	* The number of string attributes (default 1).</pre>
	122	*
	123	* <pre> -date <num>
	124	* The number of date attributes (default 1).</pre>
	125	*
	126	* <pre> -relational <num>
	127	* The number of relational attributes (default 1).</pre>
	128	*
	129	* <pre> -num-instances-relational <num>
	130	* The number of instances in relational/bag attributes (default 10).</pre>
	131	*
	132	* <pre> -words <comma-separated-list>
	133	* The words to use in string attributes.</pre>
	134	*
	135	* <pre> -word-separators <chars>
	136	* The word separators to use in string attributes.</pre>
	137	*
	138	* <pre> -eval name [options]
	139	* Full name and options of the evaluator analyzed.
	140	* eg: weka.attributeSelection.CfsSubsetEval</pre>
	141	*
	142	* <pre> -search name [options]
	143	* Full name and options of the search method analyzed.
	144	* eg: weka.attributeSelection.Ranker</pre>
	145	*
	146	* <pre> -test <eval\|search>
	147	* The scheme to test, either the evaluator or the search method.
	148	* (Default: eval)</pre>
	149	*
	150	* <pre>
	151	* Options specific to evaluator weka.attributeSelection.CfsSubsetEval:
	152	* </pre>
	153	*
	154	* <pre> -M
	155	* Treat missing values as a seperate value.</pre>
	156	*
	157	* <pre> -L
	158	* Don't include locally predictive attributes.</pre>
	159	*
	160	* <pre>
	161	* Options specific to search method weka.attributeSelection.Ranker:
	162	* </pre>
	163	*
	164	* <pre> -P <start set>
	165	* Specify a starting set of attributes.
	166	* Eg. 1,3,5-7.
	167	* Any starting attributes specified are
	168	* ignored during the ranking.</pre>
	169	*
	170	* <pre> -T <threshold>
	171	* Specify a theshold by which attributes
	172	* may be discarded from the ranking.</pre>
	173	*
	174	* <pre> -N <num to select>
	175	* Specify number of attributes to select</pre>
	176	*
	177	<!-- options-end -->
	178	*
	179	* @author Len Trigg (trigg@cs.waikato.ac.nz)
	180	* @author FracPete (fracpete at waikato dot ac dot nz)
	181	* @version $Revision: 4783 $
	182	* @see TestInstances
	183	*/
	184	public class CheckAttributeSelection
	185	extends CheckScheme {
	186
	187	/*
	188	* Note about test methods:
	189	* - methods return array of booleans
	190	* - first index: success or not
	191	* - second index: acceptable or not (e.g., Exception is OK)
	192	*
	193	* FracPete (fracpete at waikato dot ac dot nz)
	194	*/
	195
	196	/*** The evaluator to be examined */
	197	protected ASEvaluation m_Evaluator = new CfsSubsetEval();
	198
	199	/*** The search method to be used */
	200	protected ASSearch m_Search = new Ranker();
	201
	202	/** whether to test the evaluator (default) or the search method */
	203	protected boolean m_TestEvaluator = true;
	204
	205	/**
	206	* Returns an enumeration describing the available options.
	207	*
	208	* @return an enumeration of all the available options.
	209	*/
	210	public Enumeration listOptions() {
	211	Vector result = new Vector();
	212
	213	Enumeration en = super.listOptions();
	214	while (en.hasMoreElements())
	215	result.addElement(en.nextElement());
	216
	217	result.addElement(new Option(
	218	"\tFull name and options of the evaluator analyzed.\n"
	219	+"\teg: weka.attributeSelection.CfsSubsetEval",
	220	"eval", 1, "-eval name [options]"));
	221
	222	result.addElement(new Option(
	223	"\tFull name and options of the search method analyzed.\n"
	224	+"\teg: weka.attributeSelection.Ranker",
	225	"search", 1, "-search name [options]"));
	226
	227	result.addElement(new Option(
	228	"\tThe scheme to test, either the evaluator or the search method.\n"
	229	+"\t(Default: eval)",
	230	"test", 1, "-test <eval\|search>"));
	231
	232	if ((m_Evaluator != null) && (m_Evaluator instanceof OptionHandler)) {
	233	result.addElement(new Option("", "", 0,
	234	"\nOptions specific to evaluator "
	235	+ m_Evaluator.getClass().getName()
	236	+ ":"));
	237	Enumeration enm = ((OptionHandler) m_Evaluator).listOptions();
	238	while (enm.hasMoreElements())
	239	result.addElement(enm.nextElement());
	240	}
	241
	242	if ((m_Search != null) && (m_Search instanceof OptionHandler)) {
	243	result.addElement(new Option("", "", 0,
	244	"\nOptions specific to search method "
	245	+ m_Search.getClass().getName()
	246	+ ":"));
	247	Enumeration enm = ((OptionHandler) m_Search).listOptions();
	248	while (enm.hasMoreElements())
	249	result.addElement(enm.nextElement());
	250	}
	251
	252	return result.elements();
	253	}
	254
	255	/**
	256	* Parses a given list of options. <p/>
	257	*
	258	<!-- options-start -->
	259	* Valid options are: <p/>
	260	*
	261	* <pre> -D
	262	* Turn on debugging output.</pre>
	263	*
	264	* <pre> -S
	265	* Silent mode - prints nothing to stdout.</pre>
	266	*
	267	* <pre> -N <num>
	268	* The number of instances in the datasets (default 20).</pre>
	269	*
	270	* <pre> -nominal <num>
	271	* The number of nominal attributes (default 2).</pre>
	272	*
	273	* <pre> -nominal-values <num>
	274	* The number of values for nominal attributes (default 1).</pre>
	275	*
	276	* <pre> -numeric <num>
	277	* The number of numeric attributes (default 1).</pre>
	278	*
	279	* <pre> -string <num>
	280	* The number of string attributes (default 1).</pre>
	281	*
	282	* <pre> -date <num>
	283	* The number of date attributes (default 1).</pre>
	284	*
	285	* <pre> -relational <num>
	286	* The number of relational attributes (default 1).</pre>
	287	*
	288	* <pre> -num-instances-relational <num>
	289	* The number of instances in relational/bag attributes (default 10).</pre>
	290	*
	291	* <pre> -words <comma-separated-list>
	292	* The words to use in string attributes.</pre>
	293	*
	294	* <pre> -word-separators <chars>
	295	* The word separators to use in string attributes.</pre>
	296	*
	297	* <pre> -eval name [options]
	298	* Full name and options of the evaluator analyzed.
	299	* eg: weka.attributeSelection.CfsSubsetEval</pre>
	300	*
	301	* <pre> -search name [options]
	302	* Full name and options of the search method analyzed.
	303	* eg: weka.attributeSelection.Ranker</pre>
	304	*
	305	* <pre> -test <eval\|search>
	306	* The scheme to test, either the evaluator or the search method.
	307	* (Default: eval)</pre>
	308	*
	309	* <pre>
	310	* Options specific to evaluator weka.attributeSelection.CfsSubsetEval:
	311	* </pre>
	312	*
	313	* <pre> -M
	314	* Treat missing values as a seperate value.</pre>
	315	*
	316	* <pre> -L
	317	* Don't include locally predictive attributes.</pre>
	318	*
	319	* <pre>
	320	* Options specific to search method weka.attributeSelection.Ranker:
	321	* </pre>
	322	*
	323	* <pre> -P <start set>
	324	* Specify a starting set of attributes.
	325	* Eg. 1,3,5-7.
	326	* Any starting attributes specified are
	327	* ignored during the ranking.</pre>
	328	*
	329	* <pre> -T <threshold>
	330	* Specify a theshold by which attributes
	331	* may be discarded from the ranking.</pre>
	332	*
	333	* <pre> -N <num to select>
	334	* Specify number of attributes to select</pre>
	335	*
	336	<!-- options-end -->
	337	*
	338	* @param options the list of options as an array of strings
	339	* @throws Exception if an option is not supported
	340	*/
	341	public void setOptions(String[] options) throws Exception {
	342	String tmpStr;
	343	String[] tmpOptions;
	344
	345	super.setOptions(options);
	346
	347	tmpStr = Utils.getOption("eval", options);
	348	tmpOptions = Utils.splitOptions(tmpStr);
	349	if (tmpOptions.length != 0) {
	350	tmpStr = tmpOptions[0];
	351	tmpOptions[0] = "";
	352	setEvaluator(
	353	(ASEvaluation) forName(
	354	"weka.attributeSelection",
	355	ASEvaluation.class,
	356	tmpStr,
	357	tmpOptions));
	358	}
	359
	360	tmpStr = Utils.getOption("search", options);
	361	tmpOptions = Utils.splitOptions(tmpStr);
	362	if (tmpOptions.length != 0) {
	363	tmpStr = tmpOptions[0];
	364	tmpOptions[0] = "";
	365	setSearch(
	366	(ASSearch) forName(
	367	"weka.attributeSelection",
	368	ASSearch.class,
	369	tmpStr,
	370	tmpOptions));
	371	}
	372
	373	tmpStr = Utils.getOption("test", options);
	374	setTestEvaluator(!tmpStr.equalsIgnoreCase("search"));
	375	}
	376
	377	/**
	378	* Gets the current settings of the CheckAttributeSelection.
	379	*
	380	* @return an array of strings suitable for passing to setOptions
	381	*/
	382	public String[] getOptions() {
	383	Vector result;
	384	String[] options;
	385	int i;
	386
	387	result = new Vector();
	388
	389	options = super.getOptions();
	390	for (i = 0; i < options.length; i++)
	391	result.add(options[i]);
	392
	393	result.add("-eval");
	394	if (getEvaluator() instanceof OptionHandler)
	395	result.add(
	396	getEvaluator().getClass().getName()
	397	+ " "
	398	+ Utils.joinOptions(((OptionHandler) getEvaluator()).getOptions()));
	399	else
	400	result.add(
	401	getEvaluator().getClass().getName());
	402
	403	result.add("-search");
	404	if (getSearch() instanceof OptionHandler)
	405	result.add(
	406	getSearch().getClass().getName()
	407	+ " "
	408	+ Utils.joinOptions(((OptionHandler) getSearch()).getOptions()));
	409	else
	410	result.add(
	411	getSearch().getClass().getName());
	412
	413	result.add("-test");
	414	if (getTestEvaluator())
	415	result.add("eval");
	416	else
	417	result.add("search");
	418
	419	return (String[]) result.toArray(new String[result.size()]);
	420	}
	421
	422	/**
	423	* Begin the tests, reporting results to System.out
	424	*/
	425	public void doTests() {
	426
	427	if (getTestObject() == null) {
	428	println("\n=== No scheme set ===");
	429	return;
	430	}
	431	println("\n=== Check on scheme: "
	432	+ getTestObject().getClass().getName()
	433	+ " ===\n");
	434
	435	// Start tests
	436	m_ClasspathProblems = false;
	437	println("--> Checking for interfaces");
	438	canTakeOptions();
	439	boolean weightedInstancesHandler = weightedInstancesHandler()[0];
	440	boolean multiInstanceHandler = multiInstanceHandler()[0];
	441	println("--> Scheme tests");
	442	declaresSerialVersionUID();
	443	testsPerClassType(Attribute.NOMINAL, weightedInstancesHandler, multiInstanceHandler);
	444	testsPerClassType(Attribute.NUMERIC, weightedInstancesHandler, multiInstanceHandler);
	445	testsPerClassType(Attribute.DATE, weightedInstancesHandler, multiInstanceHandler);
	446	testsPerClassType(Attribute.STRING, weightedInstancesHandler, multiInstanceHandler);
	447	testsPerClassType(Attribute.RELATIONAL, weightedInstancesHandler, multiInstanceHandler);
	448	}
	449
	450	/**
	451	* Set the evaluator to test.
	452	*
	453	* @param value the evaluator to use.
	454	*/
	455	public void setEvaluator(ASEvaluation value) {
	456	m_Evaluator = value;
	457	}
	458
	459	/**
	460	* Get the current evaluator
	461	*
	462	* @return the current evaluator
	463	*/
	464	public ASEvaluation getEvaluator() {
	465	return m_Evaluator;
	466	}
	467
	468	/**
	469	* Set the search method to test.
	470	*
	471	* @param value the search method to use.
	472	*/
	473	public void setSearch(ASSearch value) {
	474	m_Search = value;
	475	}
	476
	477	/**
	478	* Get the current search method
	479	*
	480	* @return the current search method
	481	*/
	482	public ASSearch getSearch() {
	483	return m_Search;
	484	}
	485
	486	/**
	487	* Sets whether the evaluator or the search method is being tested.
	488	*
	489	* @param value if true then the evaluator will be tested
	490	*/
	491	public void setTestEvaluator(boolean value) {
	492	m_TestEvaluator = value;
	493	}
	494
	495	/**
	496	* Gets whether the evaluator is being tested or the search method.
	497	*
	498	* @return true if the evaluator is being tested
	499	*/
	500	public boolean getTestEvaluator() {
	501	return m_TestEvaluator;
	502	}
	503
	504	/**
	505	* returns either the evaluator or the search method.
	506	*
	507	* @return the object to be tested
	508	* @see #m_TestEvaluator
	509	*/
	510	protected Object getTestObject() {
	511	if (getTestEvaluator())
	512	return getEvaluator();
	513	else
	514	return getSearch();
	515	}
	516
	517	/**
	518	* returns deep copies of the given object
	519	*
	520	* @param obj the object to copy
	521	* @param num the number of copies
	522	* @return the deep copies
	523	* @throws Exception if copying fails
	524	*/
	525	protected Object[] makeCopies(Object obj, int num) throws Exception {
	526	if (obj == null)
	527	throw new Exception("No object set");
	528
	529	Object[] objs = new Object[num];
	530	SerializedObject so = new SerializedObject(obj);
	531	for(int i = 0; i < objs.length; i++) {
	532	objs[i] = so.getObject();
	533	}
	534
	535	return objs;
	536	}
	537
	538	/**
	539	* Performs a attribute selection with the given search and evaluation scheme
	540	* on the provided data. The generated AttributeSelection object is returned.
	541	*
	542	* @param search the search scheme to use
	543	* @param eval the evaluator to use
	544	* @param data the data to work on
	545	* @return the used attribute selection object
	546	* @throws Exception if the attribute selection fails
	547	*/
	548	protected AttributeSelection search(ASSearch search, ASEvaluation eval,
	549	Instances data) throws Exception {
	550
	551	AttributeSelection result;
	552
	553	result = new AttributeSelection();
	554	result.setSeed(42);
	555	result.setSearch(search);
	556	result.setEvaluator(eval);
	557	result.SelectAttributes(data);
	558
	559	return result;
	560	}
	561
	562	/**
	563	* Run a battery of tests for a given class attribute type
	564	*
	565	* @param classType true if the class attribute should be numeric
	566	* @param weighted true if the scheme says it handles weights
	567	* @param multiInstance true if the scheme handles multi-instance data
	568	*/
	569	protected void testsPerClassType(int classType,
	570	boolean weighted,
	571	boolean multiInstance) {
	572
	573	boolean PNom = canPredict(true, false, false, false, false, multiInstance, classType)[0];
	574	boolean PNum = canPredict(false, true, false, false, false, multiInstance, classType)[0];
	575	boolean PStr = canPredict(false, false, true, false, false, multiInstance, classType)[0];
	576	boolean PDat = canPredict(false, false, false, true, false, multiInstance, classType)[0];
	577	boolean PRel;
	578	if (!multiInstance)
	579	PRel = canPredict(false, false, false, false, true, multiInstance, classType)[0];
	580	else
	581	PRel = false;
	582
	583	if (PNom \|\| PNum \|\| PStr \|\| PDat \|\| PRel) {
	584	if (weighted)
	585	instanceWeights(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
	586
	587	if (classType == Attribute.NOMINAL)
	588	canHandleNClasses(PNom, PNum, PStr, PDat, PRel, multiInstance, 4);
	589
	590	if (!multiInstance) {
	591	canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 0);
	592	canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 1);
	593	}
	594
	595	canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
	596	boolean handleMissingPredictors = canHandleMissing(PNom, PNum, PStr, PDat, PRel,
	597	multiInstance, classType,
	598	true, false, 20)[0];
	599	if (handleMissingPredictors)
	600	canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, true, false, 100);
	601
	602	boolean handleMissingClass = canHandleMissing(PNom, PNum, PStr, PDat, PRel,
	603	multiInstance, classType,
	604	false, true, 20)[0];
	605	if (handleMissingClass)
	606	canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, false, true, 100);
	607
	608	correctSearchInitialisation(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
	609	datasetIntegrity(PNom, PNum, PStr, PDat, PRel, multiInstance, classType,
	610	handleMissingPredictors, handleMissingClass);
	611	}
	612	}
	613
	614	/**
	615	* Checks whether the scheme can take command line options.
	616	*
	617	* @return index 0 is true if the scheme can take options
	618	*/
	619	protected boolean[] canTakeOptions() {
	620
	621	boolean[] result = new boolean[2];
	622
	623	print("options...");
	624	if (getTestObject() instanceof OptionHandler) {
	625	println("yes");
	626	if (m_Debug) {
	627	println("\n=== Full report ===");
	628	Enumeration enu = ((OptionHandler) getTestObject()).listOptions();
	629	while (enu.hasMoreElements()) {
	630	Option option = (Option) enu.nextElement();
	631	print(option.synopsis() + "\n"
	632	+ option.description() + "\n");
	633	}
	634	println("\n");
	635	}
	636	result[0] = true;
	637	}
	638	else {
	639	println("no");
	640	result[0] = false;
	641	}
	642
	643	return result;
	644	}
	645
	646	/**
	647	* Checks whether the scheme says it can handle instance weights.
	648	*
	649	* @return true if the scheme handles instance weights
	650	*/
	651	protected boolean[] weightedInstancesHandler() {
	652
	653	boolean[] result = new boolean[2];
	654
	655	print("weighted instances scheme...");
	656	if (getTestObject() instanceof WeightedInstancesHandler) {
	657	println("yes");
	658	result[0] = true;
	659	}
	660	else {
	661	println("no");
	662	result[0] = false;
	663	}
	664
	665	return result;
	666	}
	667
	668	/**
	669	* Checks whether the scheme handles multi-instance data.
	670	*
	671	* @return true if the scheme handles multi-instance data
	672	*/
	673	protected boolean[] multiInstanceHandler() {
	674	boolean[] result = new boolean[2];
	675
	676	print("multi-instance scheme...");
	677	if (getTestObject() instanceof MultiInstanceCapabilitiesHandler) {
	678	println("yes");
	679	result[0] = true;
	680	}
	681	else {
	682	println("no");
	683	result[0] = false;
	684	}
	685
	686	return result;
	687	}
	688
	689	/**
	690	* tests for a serialVersionUID. Fails in case the schemes don't declare
	691	* a UID (both must!).
	692	*
	693	* @return index 0 is true if the scheme declares a UID
	694	*/
	695	protected boolean[] declaresSerialVersionUID() {
	696	boolean[] result = new boolean[2];
	697	boolean eval;
	698	boolean search;
	699
	700	print("serialVersionUID...");
	701
	702	eval = !SerializationHelper.needsUID(m_Evaluator.getClass());
	703	search = !SerializationHelper.needsUID(m_Search.getClass());
	704
	705	result[0] = eval && search;
	706
	707	if (result[0])
	708	println("yes");
	709	else
	710	println("no");
	711
	712	return result;
	713	}
	714
	715	/**
	716	* Checks basic prediction of the scheme, for simple non-troublesome
	717	* datasets.
	718	*
	719	* @param nominalPredictor if true use nominal predictor attributes
	720	* @param numericPredictor if true use numeric predictor attributes
	721	* @param stringPredictor if true use string predictor attributes
	722	* @param datePredictor if true use date predictor attributes
	723	* @param relationalPredictor if true use relational predictor attributes
	724	* @param multiInstance whether multi-instance is needed
	725	* @param classType the class type (NOMINAL, NUMERIC, etc.)
	726	* @return index 0 is true if the test was passed, index 1 is true if test
	727	* was acceptable
	728	*/
	729	protected boolean[] canPredict(
	730	boolean nominalPredictor,
	731	boolean numericPredictor,
	732	boolean stringPredictor,
	733	boolean datePredictor,
	734	boolean relationalPredictor,
	735	boolean multiInstance,
	736	int classType) {
	737
	738	print("basic predict");
	739	printAttributeSummary(
	740	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	741	print("...");
	742	FastVector accepts = new FastVector();
	743	accepts.addElement("unary");
	744	accepts.addElement("binary");
	745	accepts.addElement("nominal");
	746	accepts.addElement("numeric");
	747	accepts.addElement("string");
	748	accepts.addElement("date");
	749	accepts.addElement("relational");
	750	accepts.addElement("multi-instance");
	751	accepts.addElement("not in classpath");
	752	int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
	753	boolean predictorMissing = false, classMissing = false;
	754
	755	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
	756	datePredictor, relationalPredictor,
	757	multiInstance,
	758	classType,
	759	missingLevel, predictorMissing, classMissing,
	760	numTrain, numClasses,
	761	accepts);
	762	}
	763
	764	/**
	765	* Checks whether nominal schemes can handle more than two classes.
	766	* If a scheme is only designed for two-class problems it should
	767	* throw an appropriate exception for multi-class problems.
	768	*
	769	* @param nominalPredictor if true use nominal predictor attributes
	770	* @param numericPredictor if true use numeric predictor attributes
	771	* @param stringPredictor if true use string predictor attributes
	772	* @param datePredictor if true use date predictor attributes
	773	* @param relationalPredictor if true use relational predictor attributes
	774	* @param multiInstance whether multi-instance is needed
	775	* @param numClasses the number of classes to test
	776	* @return index 0 is true if the test was passed, index 1 is true if test
	777	* was acceptable
	778	*/
	779	protected boolean[] canHandleNClasses(
	780	boolean nominalPredictor,
	781	boolean numericPredictor,
	782	boolean stringPredictor,
	783	boolean datePredictor,
	784	boolean relationalPredictor,
	785	boolean multiInstance,
	786	int numClasses) {
	787
	788	print("more than two class problems");
	789	printAttributeSummary(
	790	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, Attribute.NOMINAL);
	791	print("...");
	792	FastVector accepts = new FastVector();
	793	accepts.addElement("number");
	794	accepts.addElement("class");
	795	int numTrain = getNumInstances(), missingLevel = 0;
	796	boolean predictorMissing = false, classMissing = false;
	797
	798	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
	799	datePredictor, relationalPredictor,
	800	multiInstance,
	801	Attribute.NOMINAL,
	802	missingLevel, predictorMissing, classMissing,
	803	numTrain, numClasses,
	804	accepts);
	805	}
	806
	807	/**
	808	* Checks whether the scheme can handle class attributes as Nth attribute.
	809	*
	810	* @param nominalPredictor if true use nominal predictor attributes
	811	* @param numericPredictor if true use numeric predictor attributes
	812	* @param stringPredictor if true use string predictor attributes
	813	* @param datePredictor if true use date predictor attributes
	814	* @param relationalPredictor if true use relational predictor attributes
	815	* @param multiInstance whether multi-instance is needed
	816	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	817	* @param classIndex the index of the class attribute (0-based, -1 means last attribute)
	818	* @return index 0 is true if the test was passed, index 1 is true if test
	819	* was acceptable
	820	* @see TestInstances#CLASS_IS_LAST
	821	*/
	822	protected boolean[] canHandleClassAsNthAttribute(
	823	boolean nominalPredictor,
	824	boolean numericPredictor,
	825	boolean stringPredictor,
	826	boolean datePredictor,
	827	boolean relationalPredictor,
	828	boolean multiInstance,
	829	int classType,
	830	int classIndex) {
	831
	832	if (classIndex == TestInstances.CLASS_IS_LAST)
	833	print("class attribute as last attribute");
	834	else
	835	print("class attribute as " + (classIndex + 1) + ". attribute");
	836	printAttributeSummary(
	837	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	838	print("...");
	839	FastVector accepts = new FastVector();
	840	int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
	841	boolean predictorMissing = false, classMissing = false;
	842
	843	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
	844	datePredictor, relationalPredictor,
	845	multiInstance,
	846	classType,
	847	classIndex,
	848	missingLevel, predictorMissing, classMissing,
	849	numTrain, numClasses,
	850	accepts);
	851	}
	852
	853	/**
	854	* Checks whether the scheme can handle zero training instances.
	855	*
	856	* @param nominalPredictor if true use nominal predictor attributes
	857	* @param numericPredictor if true use numeric predictor attributes
	858	* @param stringPredictor if true use string predictor attributes
	859	* @param datePredictor if true use date predictor attributes
	860	* @param relationalPredictor if true use relational predictor attributes
	861	* @param multiInstance whether multi-instance is needed
	862	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	863	* @return index 0 is true if the test was passed, index 1 is true if test
	864	* was acceptable
	865	*/
	866	protected boolean[] canHandleZeroTraining(
	867	boolean nominalPredictor,
	868	boolean numericPredictor,
	869	boolean stringPredictor,
	870	boolean datePredictor,
	871	boolean relationalPredictor,
	872	boolean multiInstance,
	873	int classType) {
	874
	875	print("handle zero training instances");
	876	printAttributeSummary(
	877	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	878	print("...");
	879	FastVector accepts = new FastVector();
	880	accepts.addElement("train");
	881	accepts.addElement("value");
	882	int numTrain = 0, numClasses = 2, missingLevel = 0;
	883	boolean predictorMissing = false, classMissing = false;
	884
	885	return runBasicTest(
	886	nominalPredictor, numericPredictor, stringPredictor,
	887	datePredictor, relationalPredictor,
	888	multiInstance,
	889	classType,
	890	missingLevel, predictorMissing, classMissing,
	891	numTrain, numClasses,
	892	accepts);
	893	}
	894
	895	/**
	896	* Checks whether the scheme correctly initialises models when
	897	* ASSearch.search is called. This test calls search with
	898	* one training dataset. ASSearch is then called on a training set with
	899	* different structure, and then again with the original training set.
	900	* If the equals method of the ASEvaluation class returns false, this is
	901	* noted as incorrect search initialisation.
	902	*
	903	* @param nominalPredictor if true use nominal predictor attributes
	904	* @param numericPredictor if true use numeric predictor attributes
	905	* @param stringPredictor if true use string predictor attributes
	906	* @param datePredictor if true use date predictor attributes
	907	* @param relationalPredictor if true use relational predictor attributes
	908	* @param multiInstance whether multi-instance is needed
	909	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	910	* @return index 0 is true if the test was passed, index 1 is always false
	911	*/
	912	protected boolean[] correctSearchInitialisation(
	913	boolean nominalPredictor,
	914	boolean numericPredictor,
	915	boolean stringPredictor,
	916	boolean datePredictor,
	917	boolean relationalPredictor,
	918	boolean multiInstance,
	919	int classType) {
	920
	921	boolean[] result = new boolean[2];
	922	print("correct initialisation during search");
	923	printAttributeSummary(
	924	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	925	print("...");
	926	int numTrain = getNumInstances(),
	927	numClasses = 2, missingLevel = 0;
	928	boolean predictorMissing = false, classMissing = false;
	929
	930	Instances train1 = null;
	931	Instances train2 = null;
	932	ASSearch search = null;
	933	ASEvaluation evaluation1A = null;
	934	ASEvaluation evaluation1B = null;
	935	ASEvaluation evaluation2 = null;
	936	AttributeSelection attsel1A = null;
	937	AttributeSelection attsel1B = null;
	938	int stage = 0;
	939	try {
	940
	941	// Make two train sets with different numbers of attributes
	942	train1 = makeTestDataset(42, numTrain,
	943	nominalPredictor ? getNumNominal() : 0,
	944	numericPredictor ? getNumNumeric() : 0,
	945	stringPredictor ? getNumString() : 0,
	946	datePredictor ? getNumDate() : 0,
	947	relationalPredictor ? getNumRelational() : 0,
	948	numClasses,
	949	classType,
	950	multiInstance);
	951	train2 = makeTestDataset(84, numTrain,
	952	nominalPredictor ? getNumNominal() + 1 : 0,
	953	numericPredictor ? getNumNumeric() + 1 : 0,
	954	stringPredictor ? getNumString() : 0,
	955	datePredictor ? getNumDate() : 0,
	956	relationalPredictor ? getNumRelational() : 0,
	957	numClasses,
	958	classType,
	959	multiInstance);
	960	if (missingLevel > 0) {
	961	addMissing(train1, missingLevel, predictorMissing, classMissing);
	962	addMissing(train2, missingLevel, predictorMissing, classMissing);
	963	}
	964
	965	search = ASSearch.makeCopies(getSearch(), 1)[0];
	966	evaluation1A = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
	967	evaluation1B = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
	968	evaluation2 = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
	969	} catch (Exception ex) {
	970	throw new Error("Error setting up for tests: " + ex.getMessage());
	971	}
	972	try {
	973	stage = 0;
	974	attsel1A = search(search, evaluation1A, train1);
	975
	976	stage = 1;
	977	search(search, evaluation2, train2);
	978
	979	stage = 2;
	980	attsel1B = search(search, evaluation1B, train1);
	981
	982	stage = 3;
	983	if (!attsel1A.toResultsString().equals(attsel1B.toResultsString())) {
	984	if (m_Debug) {
	985	println(
	986	"\n=== Full report ===\n"
	987	+ "\nFirst search\n"
	988	+ attsel1A.toResultsString()
	989	+ "\n\n");
	990	println(
	991	"\nSecond search\n"
	992	+ attsel1B.toResultsString()
	993	+ "\n\n");
	994	}
	995	throw new Exception("Results differ between search calls");
	996	}
	997	println("yes");
	998	result[0] = true;
	999
	1000	if (false && m_Debug) {
	1001	println(
	1002	"\n=== Full report ===\n"
	1003	+ "\nFirst search\n"
	1004	+ evaluation1A.toString()
	1005	+ "\n\n");
	1006	println(
	1007	"\nSecond search\n"
	1008	+ evaluation1B.toString()
	1009	+ "\n\n");
	1010	}
	1011	}
	1012	catch (Exception ex) {
	1013	println("no");
	1014	result[0] = false;
	1015	if (m_Debug) {
	1016	println("\n=== Full Report ===");
	1017	print("Problem during training");
	1018	switch (stage) {
	1019	case 0:
	1020	print(" of dataset 1");
	1021	break;
	1022	case 1:
	1023	print(" of dataset 2");
	1024	break;
	1025	case 2:
	1026	print(" of dataset 1 (2nd build)");
	1027	break;
	1028	case 3:
	1029	print(", comparing results from builds of dataset 1");
	1030	break;
	1031	}
	1032	println(": " + ex.getMessage() + "\n");
	1033	println("here are the datasets:\n");
	1034	println("=== Train1 Dataset ===\n"
	1035	+ train1.toString() + "\n");
	1036	println("=== Train2 Dataset ===\n"
	1037	+ train2.toString() + "\n");
	1038	}
	1039	}
	1040
	1041	return result;
	1042	}
	1043
	1044	/**
	1045	* Checks basic missing value handling of the scheme. If the missing
	1046	* values cause an exception to be thrown by the scheme, this will be
	1047	* recorded.
	1048	*
	1049	* @param nominalPredictor if true use nominal predictor attributes
	1050	* @param numericPredictor if true use numeric predictor attributes
	1051	* @param stringPredictor if true use string predictor attributes
	1052	* @param datePredictor if true use date predictor attributes
	1053	* @param relationalPredictor if true use relational predictor attributes
	1054	* @param multiInstance whether multi-instance is needed
	1055	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1056	* @param predictorMissing true if the missing values may be in
	1057	* the predictors
	1058	* @param classMissing true if the missing values may be in the class
	1059	* @param missingLevel the percentage of missing values
	1060	* @return index 0 is true if the test was passed, index 1 is true if test
	1061	* was acceptable
	1062	*/
	1063	protected boolean[] canHandleMissing(
	1064	boolean nominalPredictor,
	1065	boolean numericPredictor,
	1066	boolean stringPredictor,
	1067	boolean datePredictor,
	1068	boolean relationalPredictor,
	1069	boolean multiInstance,
	1070	int classType,
	1071	boolean predictorMissing,
	1072	boolean classMissing,
	1073	int missingLevel) {
	1074
	1075	if (missingLevel == 100)
	1076	print("100% ");
	1077	print("missing");
	1078	if (predictorMissing) {
	1079	print(" predictor");
	1080	if (classMissing)
	1081	print(" and");
	1082	}
	1083	if (classMissing)
	1084	print(" class");
	1085	print(" values");
	1086	printAttributeSummary(
	1087	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	1088	print("...");
	1089	FastVector accepts = new FastVector();
	1090	accepts.addElement("missing");
	1091	accepts.addElement("value");
	1092	accepts.addElement("train");
	1093	accepts.addElement("no attributes");
	1094	int numTrain = getNumInstances(), numClasses = 2;
	1095
	1096	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
	1097	datePredictor, relationalPredictor,
	1098	multiInstance,
	1099	classType,
	1100	missingLevel, predictorMissing, classMissing,
	1101	numTrain, numClasses,
	1102	accepts);
	1103	}
	1104
	1105	/**
	1106	* Checks whether the scheme can handle instance weights.
	1107	* This test compares the scheme performance on two datasets
	1108	* that are identical except for the training weights. If the
	1109	* results change, then the scheme must be using the weights. It
	1110	* may be possible to get a false positive from this test if the
	1111	* weight changes aren't significant enough to induce a change
	1112	* in scheme performance (but the weights are chosen to minimize
	1113	* the likelihood of this).
	1114	*
	1115	* @param nominalPredictor if true use nominal predictor attributes
	1116	* @param numericPredictor if true use numeric predictor attributes
	1117	* @param stringPredictor if true use string predictor attributes
	1118	* @param datePredictor if true use date predictor attributes
	1119	* @param relationalPredictor if true use relational predictor attributes
	1120	* @param multiInstance whether multi-instance is needed
	1121	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1122	* @return index 0 true if the test was passed
	1123	*/
	1124	protected boolean[] instanceWeights(
	1125	boolean nominalPredictor,
	1126	boolean numericPredictor,
	1127	boolean stringPredictor,
	1128	boolean datePredictor,
	1129	boolean relationalPredictor,
	1130	boolean multiInstance,
	1131	int classType) {
	1132
	1133	print("scheme uses instance weights");
	1134	printAttributeSummary(
	1135	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	1136	print("...");
	1137	int numTrain = 2*getNumInstances(),
	1138	numClasses = 2, missingLevel = 0;
	1139	boolean predictorMissing = false, classMissing = false;
	1140
	1141	boolean[] result = new boolean[2];
	1142	Instances train = null;
	1143	ASSearch[] search = null;
	1144	ASEvaluation evaluationB = null;
	1145	ASEvaluation evaluationI = null;
	1146	AttributeSelection attselB = null;
	1147	AttributeSelection attselI = null;
	1148	boolean evalFail = false;
	1149	try {
	1150	train = makeTestDataset(42, numTrain,
	1151	nominalPredictor ? getNumNominal() + 1 : 0,
	1152	numericPredictor ? getNumNumeric() + 1 : 0,
	1153	stringPredictor ? getNumString() : 0,
	1154	datePredictor ? getNumDate() : 0,
	1155	relationalPredictor ? getNumRelational() : 0,
	1156	numClasses,
	1157	classType,
	1158	multiInstance);
	1159	if (missingLevel > 0)
	1160	addMissing(train, missingLevel, predictorMissing, classMissing);
	1161	search = ASSearch.makeCopies(getSearch(), 2);
	1162	evaluationB = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
	1163	evaluationI = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
	1164	attselB = search(search[0], evaluationB, train);
	1165	} catch (Exception ex) {
	1166	throw new Error("Error setting up for tests: " + ex.getMessage());
	1167	}
	1168	try {
	1169
	1170	// Now modify instance weights and re-built/test
	1171	for (int i = 0; i < train.numInstances(); i++) {
	1172	train.instance(i).setWeight(0);
	1173	}
	1174	Random random = new Random(1);
	1175	for (int i = 0; i < train.numInstances() / 2; i++) {
	1176	int inst = Math.abs(random.nextInt()) % train.numInstances();
	1177	int weight = Math.abs(random.nextInt()) % 10 + 1;
	1178	train.instance(inst).setWeight(weight);
	1179	}
	1180	attselI = search(search[1], evaluationI, train);
	1181	if (attselB.toResultsString().equals(attselI.toResultsString())) {
	1182	// println("no");
	1183	evalFail = true;
	1184	throw new Exception("evalFail");
	1185	}
	1186
	1187	println("yes");
	1188	result[0] = true;
	1189	} catch (Exception ex) {
	1190	println("no");
	1191	result[0] = false;
	1192
	1193	if (m_Debug) {
	1194	println("\n=== Full Report ===");
	1195
	1196	if (evalFail) {
	1197	println("Results don't differ between non-weighted and "
	1198	+ "weighted instance models.");
	1199	println("Here are the results:\n");
	1200	println("\nboth methods\n");
	1201	println(evaluationB.toString());
	1202	} else {
	1203	print("Problem during training");
	1204	println(": " + ex.getMessage() + "\n");
	1205	}
	1206	println("Here is the dataset:\n");
	1207	println("=== Train Dataset ===\n"
	1208	+ train.toString() + "\n");
	1209	println("=== Train Weights ===\n");
	1210	for (int i = 0; i < train.numInstances(); i++) {
	1211	println(" " + (i + 1)
	1212	+ " " + train.instance(i).weight());
	1213	}
	1214	}
	1215	}
	1216
	1217	return result;
	1218	}
	1219
	1220	/**
	1221	* Checks whether the scheme alters the training dataset during
	1222	* training. If the scheme needs to modify the training
	1223	* data it should take a copy of the training data. Currently checks
	1224	* for changes to header structure, number of instances, order of
	1225	* instances, instance weights.
	1226	*
	1227	* @param nominalPredictor if true use nominal predictor attributes
	1228	* @param numericPredictor if true use numeric predictor attributes
	1229	* @param stringPredictor if true use string predictor attributes
	1230	* @param datePredictor if true use date predictor attributes
	1231	* @param relationalPredictor if true use relational predictor attributes
	1232	* @param multiInstance whether multi-instance is needed
	1233	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1234	* @param predictorMissing true if we know the scheme can handle
	1235	* (at least) moderate missing predictor values
	1236	* @param classMissing true if we know the scheme can handle
	1237	* (at least) moderate missing class values
	1238	* @return index 0 is true if the test was passed
	1239	*/
	1240	protected boolean[] datasetIntegrity(
	1241	boolean nominalPredictor,
	1242	boolean numericPredictor,
	1243	boolean stringPredictor,
	1244	boolean datePredictor,
	1245	boolean relationalPredictor,
	1246	boolean multiInstance,
	1247	int classType,
	1248	boolean predictorMissing,
	1249	boolean classMissing) {
	1250
	1251	print("scheme doesn't alter original datasets");
	1252	printAttributeSummary(
	1253	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	1254	print("...");
	1255	int numTrain = getNumInstances(),
	1256	numClasses = 2, missingLevel = 20;
	1257
	1258	boolean[] result = new boolean[2];
	1259	Instances train = null;
	1260	Instances trainCopy = null;
	1261	ASSearch search = null;
	1262	ASEvaluation evaluation = null;
	1263	try {
	1264	train = makeTestDataset(42, numTrain,
	1265	nominalPredictor ? getNumNominal() : 0,
	1266	numericPredictor ? getNumNumeric() : 0,
	1267	stringPredictor ? getNumString() : 0,
	1268	datePredictor ? getNumDate() : 0,
	1269	relationalPredictor ? getNumRelational() : 0,
	1270	numClasses,
	1271	classType,
	1272	multiInstance);
	1273	if (missingLevel > 0)
	1274	addMissing(train, missingLevel, predictorMissing, classMissing);
	1275	search = ASSearch.makeCopies(getSearch(), 1)[0];
	1276	evaluation = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
	1277	trainCopy = new Instances(train);
	1278	} catch (Exception ex) {
	1279	throw new Error("Error setting up for tests: " + ex.getMessage());
	1280	}
	1281	try {
	1282	search(search, evaluation, trainCopy);
	1283	compareDatasets(train, trainCopy);
	1284
	1285	println("yes");
	1286	result[0] = true;
	1287	} catch (Exception ex) {
	1288	println("no");
	1289	result[0] = false;
	1290
	1291	if (m_Debug) {
	1292	println("\n=== Full Report ===");
	1293	print("Problem during training");
	1294	println(": " + ex.getMessage() + "\n");
	1295	println("Here are the datasets:\n");
	1296	println("=== Train Dataset (original) ===\n"
	1297	+ trainCopy.toString() + "\n");
	1298	println("=== Train Dataset ===\n"
	1299	+ train.toString() + "\n");
	1300	}
	1301	}
	1302
	1303	return result;
	1304	}
	1305
	1306	/**
	1307	* Runs a text on the datasets with the given characteristics.
	1308	*
	1309	* @param nominalPredictor if true use nominal predictor attributes
	1310	* @param numericPredictor if true use numeric predictor attributes
	1311	* @param stringPredictor if true use string predictor attributes
	1312	* @param datePredictor if true use date predictor attributes
	1313	* @param relationalPredictor if true use relational predictor attributes
	1314	* @param multiInstance whether multi-instance is needed
	1315	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1316	* @param missingLevel the percentage of missing values
	1317	* @param predictorMissing true if the missing values may be in
	1318	* the predictors
	1319	* @param classMissing true if the missing values may be in the class
	1320	* @param numTrain the number of instances in the training set
	1321	* @param numClasses the number of classes
	1322	* @param accepts the acceptable string in an exception
	1323	* @return index 0 is true if the test was passed, index 1 is true if test
	1324	* was acceptable
	1325	*/
	1326	protected boolean[] runBasicTest(boolean nominalPredictor,
	1327	boolean numericPredictor,
	1328	boolean stringPredictor,
	1329	boolean datePredictor,
	1330	boolean relationalPredictor,
	1331	boolean multiInstance,
	1332	int classType,
	1333	int missingLevel,
	1334	boolean predictorMissing,
	1335	boolean classMissing,
	1336	int numTrain,
	1337	int numClasses,
	1338	FastVector accepts) {
	1339
	1340	return runBasicTest(
	1341	nominalPredictor,
	1342	numericPredictor,
	1343	stringPredictor,
	1344	datePredictor,
	1345	relationalPredictor,
	1346	multiInstance,
	1347	classType,
	1348	TestInstances.CLASS_IS_LAST,
	1349	missingLevel,
	1350	predictorMissing,
	1351	classMissing,
	1352	numTrain,
	1353	numClasses,
	1354	accepts);
	1355	}
	1356
	1357	/**
	1358	* Runs a text on the datasets with the given characteristics.
	1359	*
	1360	* @param nominalPredictor if true use nominal predictor attributes
	1361	* @param numericPredictor if true use numeric predictor attributes
	1362	* @param stringPredictor if true use string predictor attributes
	1363	* @param datePredictor if true use date predictor attributes
	1364	* @param relationalPredictor if true use relational predictor attributes
	1365	* @param multiInstance whether multi-instance is needed
	1366	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1367	* @param classIndex the attribute index of the class
	1368	* @param missingLevel the percentage of missing values
	1369	* @param predictorMissing true if the missing values may be in
	1370	* the predictors
	1371	* @param classMissing true if the missing values may be in the class
	1372	* @param numTrain the number of instances in the training set
	1373	* @param numClasses the number of classes
	1374	* @param accepts the acceptable string in an exception
	1375	* @return index 0 is true if the test was passed, index 1 is true if test
	1376	* was acceptable
	1377	*/
	1378	protected boolean[] runBasicTest(boolean nominalPredictor,
	1379	boolean numericPredictor,
	1380	boolean stringPredictor,
	1381	boolean datePredictor,
	1382	boolean relationalPredictor,
	1383	boolean multiInstance,
	1384	int classType,
	1385	int classIndex,
	1386	int missingLevel,
	1387	boolean predictorMissing,
	1388	boolean classMissing,
	1389	int numTrain,
	1390	int numClasses,
	1391	FastVector accepts) {
	1392
	1393	boolean[] result = new boolean[2];
	1394	Instances train = null;
	1395	ASSearch search = null;
	1396	ASEvaluation evaluation = null;
	1397	try {
	1398	train = makeTestDataset(42, numTrain,
	1399	nominalPredictor ? getNumNominal() : 0,
	1400	numericPredictor ? getNumNumeric() : 0,
	1401	stringPredictor ? getNumString() : 0,
	1402	datePredictor ? getNumDate() : 0,
	1403	relationalPredictor ? getNumRelational() : 0,
	1404	numClasses,
	1405	classType,
	1406	classIndex,
	1407	multiInstance);
	1408	if (missingLevel > 0)
	1409	addMissing(train, missingLevel, predictorMissing, classMissing);
	1410	search = ASSearch.makeCopies(getSearch(), 1)[0];
	1411	evaluation = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
	1412	} catch (Exception ex) {
	1413	ex.printStackTrace();
	1414	throw new Error("Error setting up for tests: " + ex.getMessage());
	1415	}
	1416	try {
	1417	search(search, evaluation, train);
	1418	println("yes");
	1419	result[0] = true;
	1420	}
	1421	catch (Exception ex) {
	1422	boolean acceptable = false;
	1423	String msg;
	1424	if (ex.getMessage() == null)
	1425	msg = "";
	1426	else
	1427	msg = ex.getMessage().toLowerCase();
	1428	if (msg.indexOf("not in classpath") > -1)
	1429	m_ClasspathProblems = true;
	1430	for (int i = 0; i < accepts.size(); i++) {
	1431	if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {
	1432	acceptable = true;
	1433	}
	1434	}
	1435
	1436	println("no" + (acceptable ? " (OK error message)" : ""));
	1437	result[1] = acceptable;
	1438
	1439	if (m_Debug) {
	1440	println("\n=== Full Report ===");
	1441	print("Problem during training");
	1442	println(": " + ex.getMessage() + "\n");
	1443	if (!acceptable) {
	1444	if (accepts.size() > 0) {
	1445	print("Error message doesn't mention ");
	1446	for (int i = 0; i < accepts.size(); i++) {
	1447	if (i != 0) {
	1448	print(" or ");
	1449	}
	1450	print('"' + (String)accepts.elementAt(i) + '"');
	1451	}
	1452	}
	1453	println("here is the dataset:\n");
	1454	println("=== Train Dataset ===\n"
	1455	+ train.toString() + "\n");
	1456	}
	1457	}
	1458	}
	1459
	1460	return result;
	1461	}
	1462
	1463	/**
	1464	* Make a simple set of instances, which can later be modified
	1465	* for use in specific tests.
	1466	*
	1467	* @param seed the random number seed
	1468	* @param numInstances the number of instances to generate
	1469	* @param numNominal the number of nominal attributes
	1470	* @param numNumeric the number of numeric attributes
	1471	* @param numString the number of string attributes
	1472	* @param numDate the number of date attributes
	1473	* @param numRelational the number of relational attributes
	1474	* @param numClasses the number of classes (if nominal class)
	1475	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1476	* @param multiInstance whether the dataset should a multi-instance dataset
	1477	* @return the test dataset
	1478	* @throws Exception if the dataset couldn't be generated
	1479	* @see #process(Instances)
	1480	*/
	1481	protected Instances makeTestDataset(int seed, int numInstances,
	1482	int numNominal, int numNumeric,
	1483	int numString, int numDate,
	1484	int numRelational,
	1485	int numClasses, int classType,
	1486	boolean multiInstance)
	1487	throws Exception {
	1488
	1489	return makeTestDataset(
	1490	seed,
	1491	numInstances,
	1492	numNominal,
	1493	numNumeric,
	1494	numString,
	1495	numDate,
	1496	numRelational,
	1497	numClasses,
	1498	classType,
	1499	TestInstances.CLASS_IS_LAST,
	1500	multiInstance);
	1501	}
	1502
	1503	/**
	1504	* Make a simple set of instances with variable position of the class
	1505	* attribute, which can later be modified for use in specific tests.
	1506	*
	1507	* @param seed the random number seed
	1508	* @param numInstances the number of instances to generate
	1509	* @param numNominal the number of nominal attributes
	1510	* @param numNumeric the number of numeric attributes
	1511	* @param numString the number of string attributes
	1512	* @param numDate the number of date attributes
	1513	* @param numRelational the number of relational attributes
	1514	* @param numClasses the number of classes (if nominal class)
	1515	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1516	* @param classIndex the index of the class (0-based, -1 as last)
	1517	* @param multiInstance whether the dataset should a multi-instance dataset
	1518	* @return the test dataset
	1519	* @throws Exception if the dataset couldn't be generated
	1520	* @see TestInstances#CLASS_IS_LAST
	1521	* @see #process(Instances)
	1522	*/
	1523	protected Instances makeTestDataset(int seed, int numInstances,
	1524	int numNominal, int numNumeric,
	1525	int numString, int numDate,
	1526	int numRelational,
	1527	int numClasses, int classType,
	1528	int classIndex,
	1529	boolean multiInstance)
	1530	throws Exception {
	1531
	1532	TestInstances dataset = new TestInstances();
	1533
	1534	dataset.setSeed(seed);
	1535	dataset.setNumInstances(numInstances);
	1536	dataset.setNumNominal(numNominal);
	1537	dataset.setNumNumeric(numNumeric);
	1538	dataset.setNumString(numString);
	1539	dataset.setNumDate(numDate);
	1540	dataset.setNumRelational(numRelational);
	1541	dataset.setNumClasses(numClasses);
	1542	dataset.setClassType(classType);
	1543	dataset.setClassIndex(classIndex);
	1544	dataset.setNumClasses(numClasses);
	1545	dataset.setMultiInstance(multiInstance);
	1546	dataset.setWords(getWords());
	1547	dataset.setWordSeparators(getWordSeparators());
	1548
	1549	return process(dataset.generate());
	1550	}
	1551
	1552	/**
	1553	* Print out a short summary string for the dataset characteristics
	1554	*
	1555	* @param nominalPredictor true if nominal predictor attributes are present
	1556	* @param numericPredictor true if numeric predictor attributes are present
	1557	* @param stringPredictor true if string predictor attributes are present
	1558	* @param datePredictor true if date predictor attributes are present
	1559	* @param relationalPredictor true if relational predictor attributes are present
	1560	* @param multiInstance whether multi-instance is needed
	1561	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1562	*/
	1563	protected void printAttributeSummary(boolean nominalPredictor,
	1564	boolean numericPredictor,
	1565	boolean stringPredictor,
	1566	boolean datePredictor,
	1567	boolean relationalPredictor,
	1568	boolean multiInstance,
	1569	int classType) {
	1570
	1571	String str = "";
	1572
	1573	if (numericPredictor)
	1574	str += " numeric";
	1575
	1576	if (nominalPredictor) {
	1577	if (str.length() > 0)
	1578	str += " &";
	1579	str += " nominal";
	1580	}
	1581
	1582	if (stringPredictor) {
	1583	if (str.length() > 0)
	1584	str += " &";
	1585	str += " string";
	1586	}
	1587
	1588	if (datePredictor) {
	1589	if (str.length() > 0)
	1590	str += " &";
	1591	str += " date";
	1592	}
	1593
	1594	if (relationalPredictor) {
	1595	if (str.length() > 0)
	1596	str += " &";
	1597	str += " relational";
	1598	}
	1599
	1600	str += " predictors)";
	1601
	1602	switch (classType) {
	1603	case Attribute.NUMERIC:
	1604	str = " (numeric class," + str;
	1605	break;
	1606	case Attribute.NOMINAL:
	1607	str = " (nominal class," + str;
	1608	break;
	1609	case Attribute.STRING:
	1610	str = " (string class," + str;
	1611	break;
	1612	case Attribute.DATE:
	1613	str = " (date class," + str;
	1614	break;
	1615	case Attribute.RELATIONAL:
	1616	str = " (relational class," + str;
	1617	break;
	1618	}
	1619
	1620	print(str);
	1621	}
	1622
	1623	/**
	1624	* Returns the revision string.
	1625	*
	1626	* @return the revision
	1627	*/
	1628	public String getRevision() {
	1629	return RevisionUtils.extract("$Revision: 4783 $");
	1630	}
	1631
	1632	/**
	1633	* Test method for this class
	1634	*
	1635	* @param args the commandline parameters
	1636	*/
	1637	public static void main(String [] args) {
	1638	runCheck(new CheckAttributeSelection(), args);
	1639	}
	1640	}
	1641

Note: See TracBrowser for help on using the repository browser.

Download in other formats: