Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

CheckAttributeSelection.java @ 29

Last change on this file since 29 was 29, checked in by gnappo, 14 years ago
Taggata versione per la demo e aggiunto branch.
File size: 55.9 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* CheckAttributeSelection.java
19	* Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23	package weka.attributeSelection;
24
25	import weka.core.Attribute;
26	import weka.core.CheckScheme;
27	import weka.core.FastVector;
28	import weka.core.Instances;
29	import weka.core.MultiInstanceCapabilitiesHandler;
30	import weka.core.Option;
31	import weka.core.OptionHandler;
32	import weka.core.RevisionUtils;
33	import weka.core.SerializationHelper;
34	import weka.core.SerializedObject;
35	import weka.core.TestInstances;
36	import weka.core.Utils;
37	import weka.core.WeightedInstancesHandler;
38
39	import java.util.Enumeration;
40	import java.util.Random;
41	import java.util.Vector;
42
43	/**
44	* Class for examining the capabilities and finding problems with
45	* attribute selection schemes. If you implement an attribute selection using
46	* the WEKA.libraries, you should run the checks on it to ensure robustness
47	* and correct operation. Passing all the tests of this object does not mean
48	* bugs in the attribute selection don't exist, but this will help find some
49	* common ones. <p/>
50	*
51	* Typical usage: <p/>
52	* <code>java weka.attributeSelection.CheckAttributeSelection -W ASscheme_name
53	* -- ASscheme_options </code><p/>
54	*
55	* CheckAttributeSelection reports on the following:
56	* <ul>
57	* <li> Scheme abilities
58	* <ul>
59	* <li> Possible command line options to the scheme </li>
60	* <li> Whether the scheme can predict nominal, numeric, string,
61	* date or relational class attributes. </li>
62	* <li> Whether the scheme can handle numeric predictor attributes </li>
63	* <li> Whether the scheme can handle nominal predictor attributes </li>
64	* <li> Whether the scheme can handle string predictor attributes </li>
65	* <li> Whether the scheme can handle date predictor attributes </li>
66	* <li> Whether the scheme can handle relational predictor attributes </li>
67	* <li> Whether the scheme can handle multi-instance data </li>
68	* <li> Whether the scheme can handle missing predictor values </li>
69	* <li> Whether the scheme can handle missing class values </li>
70	* <li> Whether a nominal scheme only handles 2 class problems </li>
71	* <li> Whether the scheme can handle instance weights </li>
72	* </ul>
73	* </li>
74	* <li> Correct functioning
75	* <ul>
76	* <li> Correct initialisation during search (i.e. no result
77	* changes when search is performed repeatedly) </li>
78	* <li> Whether the scheme alters the data pased to it
79	* (number of instances, instance order, instance weights, etc) </li>
80	* </ul>
81	* </li>
82	* <li> Degenerate cases
83	* <ul>
84	* <li> building scheme with zero instances </li>
85	* <li> all but one predictor attribute values missing </li>
86	* <li> all predictor attribute values missing </li>
87	* <li> all but one class values missing </li>
88	* <li> all class values missing </li>
89	* </ul>
90	* </li>
91	* </ul>
92	* Running CheckAttributeSelection with the debug option set will output the
93	* training dataset for any failed tests.<p/>
94	*
95	* The <code>weka.attributeSelection.AbstractAttributeSelectionTest</code>
96	* uses this class to test all the schemes. Any changes here, have to be
97	* checked in that abstract test class, too. <p/>
98	*
99	<!-- options-start -->
100	* Valid options are: <p/>
101	*
102	* <pre> -D
103	* Turn on debugging output.</pre>
104	*
105	* <pre> -S
106	* Silent mode - prints nothing to stdout.</pre>
107	*
108	* <pre> -N <num>
109	* The number of instances in the datasets (default 20).</pre>
110	*
111	* <pre> -nominal <num>
112	* The number of nominal attributes (default 2).</pre>
113	*
114	* <pre> -nominal-values <num>
115	* The number of values for nominal attributes (default 1).</pre>
116	*
117	* <pre> -numeric <num>
118	* The number of numeric attributes (default 1).</pre>
119	*
120	* <pre> -string <num>
121	* The number of string attributes (default 1).</pre>
122	*
123	* <pre> -date <num>
124	* The number of date attributes (default 1).</pre>
125	*
126	* <pre> -relational <num>
127	* The number of relational attributes (default 1).</pre>
128	*
129	* <pre> -num-instances-relational <num>
130	* The number of instances in relational/bag attributes (default 10).</pre>
131	*
132	* <pre> -words <comma-separated-list>
133	* The words to use in string attributes.</pre>
134	*
135	* <pre> -word-separators <chars>
136	* The word separators to use in string attributes.</pre>
137	*
138	* <pre> -eval name [options]
139	* Full name and options of the evaluator analyzed.
140	* eg: weka.attributeSelection.CfsSubsetEval</pre>
141	*
142	* <pre> -search name [options]
143	* Full name and options of the search method analyzed.
144	* eg: weka.attributeSelection.Ranker</pre>
145	*
146	* <pre> -test <eval\|search>
147	* The scheme to test, either the evaluator or the search method.
148	* (Default: eval)</pre>
149	*
150	* <pre>
151	* Options specific to evaluator weka.attributeSelection.CfsSubsetEval:
152	* </pre>
153	*
154	* <pre> -M
155	* Treat missing values as a seperate value.</pre>
156	*
157	* <pre> -L
158	* Don't include locally predictive attributes.</pre>
159	*
160	* <pre>
161	* Options specific to search method weka.attributeSelection.Ranker:
162	* </pre>
163	*
164	* <pre> -P <start set>
165	* Specify a starting set of attributes.
166	* Eg. 1,3,5-7.
167	* Any starting attributes specified are
168	* ignored during the ranking.</pre>
169	*
170	* <pre> -T <threshold>
171	* Specify a theshold by which attributes
172	* may be discarded from the ranking.</pre>
173	*
174	* <pre> -N <num to select>
175	* Specify number of attributes to select</pre>
176	*
177	<!-- options-end -->
178	*
179	* @author Len Trigg (trigg@cs.waikato.ac.nz)
180	* @author FracPete (fracpete at waikato dot ac dot nz)
181	* @version $Revision: 4783 $
182	* @see TestInstances
183	*/
184	public class CheckAttributeSelection
185	extends CheckScheme {
186
187	/*
188	* Note about test methods:
189	* - methods return array of booleans
190	* - first index: success or not
191	* - second index: acceptable or not (e.g., Exception is OK)
192	*
193	* FracPete (fracpete at waikato dot ac dot nz)
194	*/
195
196	/*** The evaluator to be examined */
197	protected ASEvaluation m_Evaluator = new CfsSubsetEval();
198
199	/*** The search method to be used */
200	protected ASSearch m_Search = new Ranker();
201
202	/** whether to test the evaluator (default) or the search method */
203	protected boolean m_TestEvaluator = true;
204
205	/**
206	* Returns an enumeration describing the available options.
207	*
208	* @return an enumeration of all the available options.
209	*/
210	public Enumeration listOptions() {
211	Vector result = new Vector();
212
213	Enumeration en = super.listOptions();
214	while (en.hasMoreElements())
215	result.addElement(en.nextElement());
216
217	result.addElement(new Option(
218	"\tFull name and options of the evaluator analyzed.\n"
219	+"\teg: weka.attributeSelection.CfsSubsetEval",
220	"eval", 1, "-eval name [options]"));
221
222	result.addElement(new Option(
223	"\tFull name and options of the search method analyzed.\n"
224	+"\teg: weka.attributeSelection.Ranker",
225	"search", 1, "-search name [options]"));
226
227	result.addElement(new Option(
228	"\tThe scheme to test, either the evaluator or the search method.\n"
229	+"\t(Default: eval)",
230	"test", 1, "-test <eval\|search>"));
231
232	if ((m_Evaluator != null) && (m_Evaluator instanceof OptionHandler)) {
233	result.addElement(new Option("", "", 0,
234	"\nOptions specific to evaluator "
235	+ m_Evaluator.getClass().getName()
236	+ ":"));
237	Enumeration enm = ((OptionHandler) m_Evaluator).listOptions();
238	while (enm.hasMoreElements())
239	result.addElement(enm.nextElement());
240	}
241
242	if ((m_Search != null) && (m_Search instanceof OptionHandler)) {
243	result.addElement(new Option("", "", 0,
244	"\nOptions specific to search method "
245	+ m_Search.getClass().getName()
246	+ ":"));
247	Enumeration enm = ((OptionHandler) m_Search).listOptions();
248	while (enm.hasMoreElements())
249	result.addElement(enm.nextElement());
250	}
251
252	return result.elements();
253	}
254
255	/**
256	* Parses a given list of options. <p/>
257	*
258	<!-- options-start -->
259	* Valid options are: <p/>
260	*
261	* <pre> -D
262	* Turn on debugging output.</pre>
263	*
264	* <pre> -S
265	* Silent mode - prints nothing to stdout.</pre>
266	*
267	* <pre> -N <num>
268	* The number of instances in the datasets (default 20).</pre>
269	*
270	* <pre> -nominal <num>
271	* The number of nominal attributes (default 2).</pre>
272	*
273	* <pre> -nominal-values <num>
274	* The number of values for nominal attributes (default 1).</pre>
275	*
276	* <pre> -numeric <num>
277	* The number of numeric attributes (default 1).</pre>
278	*
279	* <pre> -string <num>
280	* The number of string attributes (default 1).</pre>
281	*
282	* <pre> -date <num>
283	* The number of date attributes (default 1).</pre>
284	*
285	* <pre> -relational <num>
286	* The number of relational attributes (default 1).</pre>
287	*
288	* <pre> -num-instances-relational <num>
289	* The number of instances in relational/bag attributes (default 10).</pre>
290	*
291	* <pre> -words <comma-separated-list>
292	* The words to use in string attributes.</pre>
293	*
294	* <pre> -word-separators <chars>
295	* The word separators to use in string attributes.</pre>
296	*
297	* <pre> -eval name [options]
298	* Full name and options of the evaluator analyzed.
299	* eg: weka.attributeSelection.CfsSubsetEval</pre>
300	*
301	* <pre> -search name [options]
302	* Full name and options of the search method analyzed.
303	* eg: weka.attributeSelection.Ranker</pre>
304	*
305	* <pre> -test <eval\|search>
306	* The scheme to test, either the evaluator or the search method.
307	* (Default: eval)</pre>
308	*
309	* <pre>
310	* Options specific to evaluator weka.attributeSelection.CfsSubsetEval:
311	* </pre>
312	*
313	* <pre> -M
314	* Treat missing values as a seperate value.</pre>
315	*
316	* <pre> -L
317	* Don't include locally predictive attributes.</pre>
318	*
319	* <pre>
320	* Options specific to search method weka.attributeSelection.Ranker:
321	* </pre>
322	*
323	* <pre> -P <start set>
324	* Specify a starting set of attributes.
325	* Eg. 1,3,5-7.
326	* Any starting attributes specified are
327	* ignored during the ranking.</pre>
328	*
329	* <pre> -T <threshold>
330	* Specify a theshold by which attributes
331	* may be discarded from the ranking.</pre>
332	*
333	* <pre> -N <num to select>
334	* Specify number of attributes to select</pre>
335	*
336	<!-- options-end -->
337	*
338	* @param options the list of options as an array of strings
339	* @throws Exception if an option is not supported
340	*/
341	public void setOptions(String[] options) throws Exception {
342	String tmpStr;
343	String[] tmpOptions;
344
345	super.setOptions(options);
346
347	tmpStr = Utils.getOption("eval", options);
348	tmpOptions = Utils.splitOptions(tmpStr);
349	if (tmpOptions.length != 0) {
350	tmpStr = tmpOptions[0];
351	tmpOptions[0] = "";
352	setEvaluator(
353	(ASEvaluation) forName(
354	"weka.attributeSelection",
355	ASEvaluation.class,
356	tmpStr,
357	tmpOptions));
358	}
359
360	tmpStr = Utils.getOption("search", options);
361	tmpOptions = Utils.splitOptions(tmpStr);
362	if (tmpOptions.length != 0) {
363	tmpStr = tmpOptions[0];
364	tmpOptions[0] = "";
365	setSearch(
366	(ASSearch) forName(
367	"weka.attributeSelection",
368	ASSearch.class,
369	tmpStr,
370	tmpOptions));
371	}
372
373	tmpStr = Utils.getOption("test", options);
374	setTestEvaluator(!tmpStr.equalsIgnoreCase("search"));
375	}
376
377	/**
378	* Gets the current settings of the CheckAttributeSelection.
379	*
380	* @return an array of strings suitable for passing to setOptions
381	*/
382	public String[] getOptions() {
383	Vector result;
384	String[] options;
385	int i;
386
387	result = new Vector();
388
389	options = super.getOptions();
390	for (i = 0; i < options.length; i++)
391	result.add(options[i]);
392
393	result.add("-eval");
394	if (getEvaluator() instanceof OptionHandler)
395	result.add(
396	getEvaluator().getClass().getName()
397	+ " "
398	+ Utils.joinOptions(((OptionHandler) getEvaluator()).getOptions()));
399	else
400	result.add(
401	getEvaluator().getClass().getName());
402
403	result.add("-search");
404	if (getSearch() instanceof OptionHandler)
405	result.add(
406	getSearch().getClass().getName()
407	+ " "
408	+ Utils.joinOptions(((OptionHandler) getSearch()).getOptions()));
409	else
410	result.add(
411	getSearch().getClass().getName());
412
413	result.add("-test");
414	if (getTestEvaluator())
415	result.add("eval");
416	else
417	result.add("search");
418
419	return (String[]) result.toArray(new String[result.size()]);
420	}
421
422	/**
423	* Begin the tests, reporting results to System.out
424	*/
425	public void doTests() {
426
427	if (getTestObject() == null) {
428	println("\n=== No scheme set ===");
429	return;
430	}
431	println("\n=== Check on scheme: "
432	+ getTestObject().getClass().getName()
433	+ " ===\n");
434
435	// Start tests
436	m_ClasspathProblems = false;
437	println("--> Checking for interfaces");
438	canTakeOptions();
439	boolean weightedInstancesHandler = weightedInstancesHandler()[0];
440	boolean multiInstanceHandler = multiInstanceHandler()[0];
441	println("--> Scheme tests");
442	declaresSerialVersionUID();
443	testsPerClassType(Attribute.NOMINAL, weightedInstancesHandler, multiInstanceHandler);
444	testsPerClassType(Attribute.NUMERIC, weightedInstancesHandler, multiInstanceHandler);
445	testsPerClassType(Attribute.DATE, weightedInstancesHandler, multiInstanceHandler);
446	testsPerClassType(Attribute.STRING, weightedInstancesHandler, multiInstanceHandler);
447	testsPerClassType(Attribute.RELATIONAL, weightedInstancesHandler, multiInstanceHandler);
448	}
449
450	/**
451	* Set the evaluator to test.
452	*
453	* @param value the evaluator to use.
454	*/
455	public void setEvaluator(ASEvaluation value) {
456	m_Evaluator = value;
457	}
458
459	/**
460	* Get the current evaluator
461	*
462	* @return the current evaluator
463	*/
464	public ASEvaluation getEvaluator() {
465	return m_Evaluator;
466	}
467
468	/**
469	* Set the search method to test.
470	*
471	* @param value the search method to use.
472	*/
473	public void setSearch(ASSearch value) {
474	m_Search = value;
475	}
476
477	/**
478	* Get the current search method
479	*
480	* @return the current search method
481	*/
482	public ASSearch getSearch() {
483	return m_Search;
484	}
485
486	/**
487	* Sets whether the evaluator or the search method is being tested.
488	*
489	* @param value if true then the evaluator will be tested
490	*/
491	public void setTestEvaluator(boolean value) {
492	m_TestEvaluator = value;
493	}
494
495	/**
496	* Gets whether the evaluator is being tested or the search method.
497	*
498	* @return true if the evaluator is being tested
499	*/
500	public boolean getTestEvaluator() {
501	return m_TestEvaluator;
502	}
503
504	/**
505	* returns either the evaluator or the search method.
506	*
507	* @return the object to be tested
508	* @see #m_TestEvaluator
509	*/
510	protected Object getTestObject() {
511	if (getTestEvaluator())
512	return getEvaluator();
513	else
514	return getSearch();
515	}
516
517	/**
518	* returns deep copies of the given object
519	*
520	* @param obj the object to copy
521	* @param num the number of copies
522	* @return the deep copies
523	* @throws Exception if copying fails
524	*/
525	protected Object[] makeCopies(Object obj, int num) throws Exception {
526	if (obj == null)
527	throw new Exception("No object set");
528
529	Object[] objs = new Object[num];
530	SerializedObject so = new SerializedObject(obj);
531	for(int i = 0; i < objs.length; i++) {
532	objs[i] = so.getObject();
533	}
534
535	return objs;
536	}
537
538	/**
539	* Performs a attribute selection with the given search and evaluation scheme
540	* on the provided data. The generated AttributeSelection object is returned.
541	*
542	* @param search the search scheme to use
543	* @param eval the evaluator to use
544	* @param data the data to work on
545	* @return the used attribute selection object
546	* @throws Exception if the attribute selection fails
547	*/
548	protected AttributeSelection search(ASSearch search, ASEvaluation eval,
549	Instances data) throws Exception {
550
551	AttributeSelection result;
552
553	result = new AttributeSelection();
554	result.setSeed(42);
555	result.setSearch(search);
556	result.setEvaluator(eval);
557	result.SelectAttributes(data);
558
559	return result;
560	}
561
562	/**
563	* Run a battery of tests for a given class attribute type
564	*
565	* @param classType true if the class attribute should be numeric
566	* @param weighted true if the scheme says it handles weights
567	* @param multiInstance true if the scheme handles multi-instance data
568	*/
569	protected void testsPerClassType(int classType,
570	boolean weighted,
571	boolean multiInstance) {
572
573	boolean PNom = canPredict(true, false, false, false, false, multiInstance, classType)[0];
574	boolean PNum = canPredict(false, true, false, false, false, multiInstance, classType)[0];
575	boolean PStr = canPredict(false, false, true, false, false, multiInstance, classType)[0];
576	boolean PDat = canPredict(false, false, false, true, false, multiInstance, classType)[0];
577	boolean PRel;
578	if (!multiInstance)
579	PRel = canPredict(false, false, false, false, true, multiInstance, classType)[0];
580	else
581	PRel = false;
582
583	if (PNom \|\| PNum \|\| PStr \|\| PDat \|\| PRel) {
584	if (weighted)
585	instanceWeights(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
586
587	if (classType == Attribute.NOMINAL)
588	canHandleNClasses(PNom, PNum, PStr, PDat, PRel, multiInstance, 4);
589
590	if (!multiInstance) {
591	canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 0);
592	canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 1);
593	}
594
595	canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
596	boolean handleMissingPredictors = canHandleMissing(PNom, PNum, PStr, PDat, PRel,
597	multiInstance, classType,
598	true, false, 20)[0];
599	if (handleMissingPredictors)
600	canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, true, false, 100);
601
602	boolean handleMissingClass = canHandleMissing(PNom, PNum, PStr, PDat, PRel,
603	multiInstance, classType,
604	false, true, 20)[0];
605	if (handleMissingClass)
606	canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, false, true, 100);
607
608	correctSearchInitialisation(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
609	datasetIntegrity(PNom, PNum, PStr, PDat, PRel, multiInstance, classType,
610	handleMissingPredictors, handleMissingClass);
611	}
612	}
613
614	/**
615	* Checks whether the scheme can take command line options.
616	*
617	* @return index 0 is true if the scheme can take options
618	*/
619	protected boolean[] canTakeOptions() {
620
621	boolean[] result = new boolean[2];
622
623	print("options...");
624	if (getTestObject() instanceof OptionHandler) {
625	println("yes");
626	if (m_Debug) {
627	println("\n=== Full report ===");
628	Enumeration enu = ((OptionHandler) getTestObject()).listOptions();
629	while (enu.hasMoreElements()) {
630	Option option = (Option) enu.nextElement();
631	print(option.synopsis() + "\n"
632	+ option.description() + "\n");
633	}
634	println("\n");
635	}
636	result[0] = true;
637	}
638	else {
639	println("no");
640	result[0] = false;
641	}
642
643	return result;
644	}
645
646	/**
647	* Checks whether the scheme says it can handle instance weights.
648	*
649	* @return true if the scheme handles instance weights
650	*/
651	protected boolean[] weightedInstancesHandler() {
652
653	boolean[] result = new boolean[2];
654
655	print("weighted instances scheme...");
656	if (getTestObject() instanceof WeightedInstancesHandler) {
657	println("yes");
658	result[0] = true;
659	}
660	else {
661	println("no");
662	result[0] = false;
663	}
664
665	return result;
666	}
667
668	/**
669	* Checks whether the scheme handles multi-instance data.
670	*
671	* @return true if the scheme handles multi-instance data
672	*/
673	protected boolean[] multiInstanceHandler() {
674	boolean[] result = new boolean[2];
675
676	print("multi-instance scheme...");
677	if (getTestObject() instanceof MultiInstanceCapabilitiesHandler) {
678	println("yes");
679	result[0] = true;
680	}
681	else {
682	println("no");
683	result[0] = false;
684	}
685
686	return result;
687	}
688
689	/**
690	* tests for a serialVersionUID. Fails in case the schemes don't declare
691	* a UID (both must!).
692	*
693	* @return index 0 is true if the scheme declares a UID
694	*/
695	protected boolean[] declaresSerialVersionUID() {
696	boolean[] result = new boolean[2];
697	boolean eval;
698	boolean search;
699
700	print("serialVersionUID...");
701
702	eval = !SerializationHelper.needsUID(m_Evaluator.getClass());
703	search = !SerializationHelper.needsUID(m_Search.getClass());
704
705	result[0] = eval && search;
706
707	if (result[0])
708	println("yes");
709	else
710	println("no");
711
712	return result;
713	}
714
715	/**
716	* Checks basic prediction of the scheme, for simple non-troublesome
717	* datasets.
718	*
719	* @param nominalPredictor if true use nominal predictor attributes
720	* @param numericPredictor if true use numeric predictor attributes
721	* @param stringPredictor if true use string predictor attributes
722	* @param datePredictor if true use date predictor attributes
723	* @param relationalPredictor if true use relational predictor attributes
724	* @param multiInstance whether multi-instance is needed
725	* @param classType the class type (NOMINAL, NUMERIC, etc.)
726	* @return index 0 is true if the test was passed, index 1 is true if test
727	* was acceptable
728	*/
729	protected boolean[] canPredict(
730	boolean nominalPredictor,
731	boolean numericPredictor,
732	boolean stringPredictor,
733	boolean datePredictor,
734	boolean relationalPredictor,
735	boolean multiInstance,
736	int classType) {
737
738	print("basic predict");
739	printAttributeSummary(
740	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
741	print("...");
742	FastVector accepts = new FastVector();
743	accepts.addElement("unary");
744	accepts.addElement("binary");
745	accepts.addElement("nominal");
746	accepts.addElement("numeric");
747	accepts.addElement("string");
748	accepts.addElement("date");
749	accepts.addElement("relational");
750	accepts.addElement("multi-instance");
751	accepts.addElement("not in classpath");
752	int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
753	boolean predictorMissing = false, classMissing = false;
754
755	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
756	datePredictor, relationalPredictor,
757	multiInstance,
758	classType,
759	missingLevel, predictorMissing, classMissing,
760	numTrain, numClasses,
761	accepts);
762	}
763
764	/**
765	* Checks whether nominal schemes can handle more than two classes.
766	* If a scheme is only designed for two-class problems it should
767	* throw an appropriate exception for multi-class problems.
768	*
769	* @param nominalPredictor if true use nominal predictor attributes
770	* @param numericPredictor if true use numeric predictor attributes
771	* @param stringPredictor if true use string predictor attributes
772	* @param datePredictor if true use date predictor attributes
773	* @param relationalPredictor if true use relational predictor attributes
774	* @param multiInstance whether multi-instance is needed
775	* @param numClasses the number of classes to test
776	* @return index 0 is true if the test was passed, index 1 is true if test
777	* was acceptable
778	*/
779	protected boolean[] canHandleNClasses(
780	boolean nominalPredictor,
781	boolean numericPredictor,
782	boolean stringPredictor,
783	boolean datePredictor,
784	boolean relationalPredictor,
785	boolean multiInstance,
786	int numClasses) {
787
788	print("more than two class problems");
789	printAttributeSummary(
790	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, Attribute.NOMINAL);
791	print("...");
792	FastVector accepts = new FastVector();
793	accepts.addElement("number");
794	accepts.addElement("class");
795	int numTrain = getNumInstances(), missingLevel = 0;
796	boolean predictorMissing = false, classMissing = false;
797
798	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
799	datePredictor, relationalPredictor,
800	multiInstance,
801	Attribute.NOMINAL,
802	missingLevel, predictorMissing, classMissing,
803	numTrain, numClasses,
804	accepts);
805	}
806
807	/**
808	* Checks whether the scheme can handle class attributes as Nth attribute.
809	*
810	* @param nominalPredictor if true use nominal predictor attributes
811	* @param numericPredictor if true use numeric predictor attributes
812	* @param stringPredictor if true use string predictor attributes
813	* @param datePredictor if true use date predictor attributes
814	* @param relationalPredictor if true use relational predictor attributes
815	* @param multiInstance whether multi-instance is needed
816	* @param classType the class type (NUMERIC, NOMINAL, etc.)
817	* @param classIndex the index of the class attribute (0-based, -1 means last attribute)
818	* @return index 0 is true if the test was passed, index 1 is true if test
819	* was acceptable
820	* @see TestInstances#CLASS_IS_LAST
821	*/
822	protected boolean[] canHandleClassAsNthAttribute(
823	boolean nominalPredictor,
824	boolean numericPredictor,
825	boolean stringPredictor,
826	boolean datePredictor,
827	boolean relationalPredictor,
828	boolean multiInstance,
829	int classType,
830	int classIndex) {
831
832	if (classIndex == TestInstances.CLASS_IS_LAST)
833	print("class attribute as last attribute");
834	else
835	print("class attribute as " + (classIndex + 1) + ". attribute");
836	printAttributeSummary(
837	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
838	print("...");
839	FastVector accepts = new FastVector();
840	int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
841	boolean predictorMissing = false, classMissing = false;
842
843	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
844	datePredictor, relationalPredictor,
845	multiInstance,
846	classType,
847	classIndex,
848	missingLevel, predictorMissing, classMissing,
849	numTrain, numClasses,
850	accepts);
851	}
852
853	/**
854	* Checks whether the scheme can handle zero training instances.
855	*
856	* @param nominalPredictor if true use nominal predictor attributes
857	* @param numericPredictor if true use numeric predictor attributes
858	* @param stringPredictor if true use string predictor attributes
859	* @param datePredictor if true use date predictor attributes
860	* @param relationalPredictor if true use relational predictor attributes
861	* @param multiInstance whether multi-instance is needed
862	* @param classType the class type (NUMERIC, NOMINAL, etc.)
863	* @return index 0 is true if the test was passed, index 1 is true if test
864	* was acceptable
865	*/
866	protected boolean[] canHandleZeroTraining(
867	boolean nominalPredictor,
868	boolean numericPredictor,
869	boolean stringPredictor,
870	boolean datePredictor,
871	boolean relationalPredictor,
872	boolean multiInstance,
873	int classType) {
874
875	print("handle zero training instances");
876	printAttributeSummary(
877	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
878	print("...");
879	FastVector accepts = new FastVector();
880	accepts.addElement("train");
881	accepts.addElement("value");
882	int numTrain = 0, numClasses = 2, missingLevel = 0;
883	boolean predictorMissing = false, classMissing = false;
884
885	return runBasicTest(
886	nominalPredictor, numericPredictor, stringPredictor,
887	datePredictor, relationalPredictor,
888	multiInstance,
889	classType,
890	missingLevel, predictorMissing, classMissing,
891	numTrain, numClasses,
892	accepts);
893	}
894
895	/**
896	* Checks whether the scheme correctly initialises models when
897	* ASSearch.search is called. This test calls search with
898	* one training dataset. ASSearch is then called on a training set with
899	* different structure, and then again with the original training set.
900	* If the equals method of the ASEvaluation class returns false, this is
901	* noted as incorrect search initialisation.
902	*
903	* @param nominalPredictor if true use nominal predictor attributes
904	* @param numericPredictor if true use numeric predictor attributes
905	* @param stringPredictor if true use string predictor attributes
906	* @param datePredictor if true use date predictor attributes
907	* @param relationalPredictor if true use relational predictor attributes
908	* @param multiInstance whether multi-instance is needed
909	* @param classType the class type (NUMERIC, NOMINAL, etc.)
910	* @return index 0 is true if the test was passed, index 1 is always false
911	*/
912	protected boolean[] correctSearchInitialisation(
913	boolean nominalPredictor,
914	boolean numericPredictor,
915	boolean stringPredictor,
916	boolean datePredictor,
917	boolean relationalPredictor,
918	boolean multiInstance,
919	int classType) {
920
921	boolean[] result = new boolean[2];
922	print("correct initialisation during search");
923	printAttributeSummary(
924	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
925	print("...");
926	int numTrain = getNumInstances(),
927	numClasses = 2, missingLevel = 0;
928	boolean predictorMissing = false, classMissing = false;
929
930	Instances train1 = null;
931	Instances train2 = null;
932	ASSearch search = null;
933	ASEvaluation evaluation1A = null;
934	ASEvaluation evaluation1B = null;
935	ASEvaluation evaluation2 = null;
936	AttributeSelection attsel1A = null;
937	AttributeSelection attsel1B = null;
938	int stage = 0;
939	try {
940
941	// Make two train sets with different numbers of attributes
942	train1 = makeTestDataset(42, numTrain,
943	nominalPredictor ? getNumNominal() : 0,
944	numericPredictor ? getNumNumeric() : 0,
945	stringPredictor ? getNumString() : 0,
946	datePredictor ? getNumDate() : 0,
947	relationalPredictor ? getNumRelational() : 0,
948	numClasses,
949	classType,
950	multiInstance);
951	train2 = makeTestDataset(84, numTrain,
952	nominalPredictor ? getNumNominal() + 1 : 0,
953	numericPredictor ? getNumNumeric() + 1 : 0,
954	stringPredictor ? getNumString() : 0,
955	datePredictor ? getNumDate() : 0,
956	relationalPredictor ? getNumRelational() : 0,
957	numClasses,
958	classType,
959	multiInstance);
960	if (missingLevel > 0) {
961	addMissing(train1, missingLevel, predictorMissing, classMissing);
962	addMissing(train2, missingLevel, predictorMissing, classMissing);
963	}
964
965	search = ASSearch.makeCopies(getSearch(), 1)[0];
966	evaluation1A = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
967	evaluation1B = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
968	evaluation2 = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
969	} catch (Exception ex) {
970	throw new Error("Error setting up for tests: " + ex.getMessage());
971	}
972	try {
973	stage = 0;
974	attsel1A = search(search, evaluation1A, train1);
975
976	stage = 1;
977	search(search, evaluation2, train2);
978
979	stage = 2;
980	attsel1B = search(search, evaluation1B, train1);
981
982	stage = 3;
983	if (!attsel1A.toResultsString().equals(attsel1B.toResultsString())) {
984	if (m_Debug) {
985	println(
986	"\n=== Full report ===\n"
987	+ "\nFirst search\n"
988	+ attsel1A.toResultsString()
989	+ "\n\n");
990	println(
991	"\nSecond search\n"
992	+ attsel1B.toResultsString()
993	+ "\n\n");
994	}
995	throw new Exception("Results differ between search calls");
996	}
997	println("yes");
998	result[0] = true;
999
1000	if (false && m_Debug) {
1001	println(
1002	"\n=== Full report ===\n"
1003	+ "\nFirst search\n"
1004	+ evaluation1A.toString()
1005	+ "\n\n");
1006	println(
1007	"\nSecond search\n"
1008	+ evaluation1B.toString()
1009	+ "\n\n");
1010	}
1011	}
1012	catch (Exception ex) {
1013	println("no");
1014	result[0] = false;
1015	if (m_Debug) {
1016	println("\n=== Full Report ===");
1017	print("Problem during training");
1018	switch (stage) {
1019	case 0:
1020	print(" of dataset 1");
1021	break;
1022	case 1:
1023	print(" of dataset 2");
1024	break;
1025	case 2:
1026	print(" of dataset 1 (2nd build)");
1027	break;
1028	case 3:
1029	print(", comparing results from builds of dataset 1");
1030	break;
1031	}
1032	println(": " + ex.getMessage() + "\n");
1033	println("here are the datasets:\n");
1034	println("=== Train1 Dataset ===\n"
1035	+ train1.toString() + "\n");
1036	println("=== Train2 Dataset ===\n"
1037	+ train2.toString() + "\n");
1038	}
1039	}
1040
1041	return result;
1042	}
1043
1044	/**
1045	* Checks basic missing value handling of the scheme. If the missing
1046	* values cause an exception to be thrown by the scheme, this will be
1047	* recorded.
1048	*
1049	* @param nominalPredictor if true use nominal predictor attributes
1050	* @param numericPredictor if true use numeric predictor attributes
1051	* @param stringPredictor if true use string predictor attributes
1052	* @param datePredictor if true use date predictor attributes
1053	* @param relationalPredictor if true use relational predictor attributes
1054	* @param multiInstance whether multi-instance is needed
1055	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1056	* @param predictorMissing true if the missing values may be in
1057	* the predictors
1058	* @param classMissing true if the missing values may be in the class
1059	* @param missingLevel the percentage of missing values
1060	* @return index 0 is true if the test was passed, index 1 is true if test
1061	* was acceptable
1062	*/
1063	protected boolean[] canHandleMissing(
1064	boolean nominalPredictor,
1065	boolean numericPredictor,
1066	boolean stringPredictor,
1067	boolean datePredictor,
1068	boolean relationalPredictor,
1069	boolean multiInstance,
1070	int classType,
1071	boolean predictorMissing,
1072	boolean classMissing,
1073	int missingLevel) {
1074
1075	if (missingLevel == 100)
1076	print("100% ");
1077	print("missing");
1078	if (predictorMissing) {
1079	print(" predictor");
1080	if (classMissing)
1081	print(" and");
1082	}
1083	if (classMissing)
1084	print(" class");
1085	print(" values");
1086	printAttributeSummary(
1087	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
1088	print("...");
1089	FastVector accepts = new FastVector();
1090	accepts.addElement("missing");
1091	accepts.addElement("value");
1092	accepts.addElement("train");
1093	accepts.addElement("no attributes");
1094	int numTrain = getNumInstances(), numClasses = 2;
1095
1096	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
1097	datePredictor, relationalPredictor,
1098	multiInstance,
1099	classType,
1100	missingLevel, predictorMissing, classMissing,
1101	numTrain, numClasses,
1102	accepts);
1103	}
1104
1105	/**
1106	* Checks whether the scheme can handle instance weights.
1107	* This test compares the scheme performance on two datasets
1108	* that are identical except for the training weights. If the
1109	* results change, then the scheme must be using the weights. It
1110	* may be possible to get a false positive from this test if the
1111	* weight changes aren't significant enough to induce a change
1112	* in scheme performance (but the weights are chosen to minimize
1113	* the likelihood of this).
1114	*
1115	* @param nominalPredictor if true use nominal predictor attributes
1116	* @param numericPredictor if true use numeric predictor attributes
1117	* @param stringPredictor if true use string predictor attributes
1118	* @param datePredictor if true use date predictor attributes
1119	* @param relationalPredictor if true use relational predictor attributes
1120	* @param multiInstance whether multi-instance is needed
1121	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1122	* @return index 0 true if the test was passed
1123	*/
1124	protected boolean[] instanceWeights(
1125	boolean nominalPredictor,
1126	boolean numericPredictor,
1127	boolean stringPredictor,
1128	boolean datePredictor,
1129	boolean relationalPredictor,
1130	boolean multiInstance,
1131	int classType) {
1132
1133	print("scheme uses instance weights");
1134	printAttributeSummary(
1135	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
1136	print("...");
1137	int numTrain = 2*getNumInstances(),
1138	numClasses = 2, missingLevel = 0;
1139	boolean predictorMissing = false, classMissing = false;
1140
1141	boolean[] result = new boolean[2];
1142	Instances train = null;
1143	ASSearch[] search = null;
1144	ASEvaluation evaluationB = null;
1145	ASEvaluation evaluationI = null;
1146	AttributeSelection attselB = null;
1147	AttributeSelection attselI = null;
1148	boolean evalFail = false;
1149	try {
1150	train = makeTestDataset(42, numTrain,
1151	nominalPredictor ? getNumNominal() + 1 : 0,
1152	numericPredictor ? getNumNumeric() + 1 : 0,
1153	stringPredictor ? getNumString() : 0,
1154	datePredictor ? getNumDate() : 0,
1155	relationalPredictor ? getNumRelational() : 0,
1156	numClasses,
1157	classType,
1158	multiInstance);
1159	if (missingLevel > 0)
1160	addMissing(train, missingLevel, predictorMissing, classMissing);
1161	search = ASSearch.makeCopies(getSearch(), 2);
1162	evaluationB = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
1163	evaluationI = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
1164	attselB = search(search[0], evaluationB, train);
1165	} catch (Exception ex) {
1166	throw new Error("Error setting up for tests: " + ex.getMessage());
1167	}
1168	try {
1169
1170	// Now modify instance weights and re-built/test
1171	for (int i = 0; i < train.numInstances(); i++) {
1172	train.instance(i).setWeight(0);
1173	}
1174	Random random = new Random(1);
1175	for (int i = 0; i < train.numInstances() / 2; i++) {
1176	int inst = Math.abs(random.nextInt()) % train.numInstances();
1177	int weight = Math.abs(random.nextInt()) % 10 + 1;
1178	train.instance(inst).setWeight(weight);
1179	}
1180	attselI = search(search[1], evaluationI, train);
1181	if (attselB.toResultsString().equals(attselI.toResultsString())) {
1182	// println("no");
1183	evalFail = true;
1184	throw new Exception("evalFail");
1185	}
1186
1187	println("yes");
1188	result[0] = true;
1189	} catch (Exception ex) {
1190	println("no");
1191	result[0] = false;
1192
1193	if (m_Debug) {
1194	println("\n=== Full Report ===");
1195
1196	if (evalFail) {
1197	println("Results don't differ between non-weighted and "
1198	+ "weighted instance models.");
1199	println("Here are the results:\n");
1200	println("\nboth methods\n");
1201	println(evaluationB.toString());
1202	} else {
1203	print("Problem during training");
1204	println(": " + ex.getMessage() + "\n");
1205	}
1206	println("Here is the dataset:\n");
1207	println("=== Train Dataset ===\n"
1208	+ train.toString() + "\n");
1209	println("=== Train Weights ===\n");
1210	for (int i = 0; i < train.numInstances(); i++) {
1211	println(" " + (i + 1)
1212	+ " " + train.instance(i).weight());
1213	}
1214	}
1215	}
1216
1217	return result;
1218	}
1219
1220	/**
1221	* Checks whether the scheme alters the training dataset during
1222	* training. If the scheme needs to modify the training
1223	* data it should take a copy of the training data. Currently checks
1224	* for changes to header structure, number of instances, order of
1225	* instances, instance weights.
1226	*
1227	* @param nominalPredictor if true use nominal predictor attributes
1228	* @param numericPredictor if true use numeric predictor attributes
1229	* @param stringPredictor if true use string predictor attributes
1230	* @param datePredictor if true use date predictor attributes
1231	* @param relationalPredictor if true use relational predictor attributes
1232	* @param multiInstance whether multi-instance is needed
1233	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1234	* @param predictorMissing true if we know the scheme can handle
1235	* (at least) moderate missing predictor values
1236	* @param classMissing true if we know the scheme can handle
1237	* (at least) moderate missing class values
1238	* @return index 0 is true if the test was passed
1239	*/
1240	protected boolean[] datasetIntegrity(
1241	boolean nominalPredictor,
1242	boolean numericPredictor,
1243	boolean stringPredictor,
1244	boolean datePredictor,
1245	boolean relationalPredictor,
1246	boolean multiInstance,
1247	int classType,
1248	boolean predictorMissing,
1249	boolean classMissing) {
1250
1251	print("scheme doesn't alter original datasets");
1252	printAttributeSummary(
1253	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
1254	print("...");
1255	int numTrain = getNumInstances(),
1256	numClasses = 2, missingLevel = 20;
1257
1258	boolean[] result = new boolean[2];
1259	Instances train = null;
1260	Instances trainCopy = null;
1261	ASSearch search = null;
1262	ASEvaluation evaluation = null;
1263	try {
1264	train = makeTestDataset(42, numTrain,
1265	nominalPredictor ? getNumNominal() : 0,
1266	numericPredictor ? getNumNumeric() : 0,
1267	stringPredictor ? getNumString() : 0,
1268	datePredictor ? getNumDate() : 0,
1269	relationalPredictor ? getNumRelational() : 0,
1270	numClasses,
1271	classType,
1272	multiInstance);
1273	if (missingLevel > 0)
1274	addMissing(train, missingLevel, predictorMissing, classMissing);
1275	search = ASSearch.makeCopies(getSearch(), 1)[0];
1276	evaluation = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
1277	trainCopy = new Instances(train);
1278	} catch (Exception ex) {
1279	throw new Error("Error setting up for tests: " + ex.getMessage());
1280	}
1281	try {
1282	search(search, evaluation, trainCopy);
1283	compareDatasets(train, trainCopy);
1284
1285	println("yes");
1286	result[0] = true;
1287	} catch (Exception ex) {
1288	println("no");
1289	result[0] = false;
1290
1291	if (m_Debug) {
1292	println("\n=== Full Report ===");
1293	print("Problem during training");
1294	println(": " + ex.getMessage() + "\n");
1295	println("Here are the datasets:\n");
1296	println("=== Train Dataset (original) ===\n"
1297	+ trainCopy.toString() + "\n");
1298	println("=== Train Dataset ===\n"
1299	+ train.toString() + "\n");
1300	}
1301	}
1302
1303	return result;
1304	}
1305
1306	/**
1307	* Runs a text on the datasets with the given characteristics.
1308	*
1309	* @param nominalPredictor if true use nominal predictor attributes
1310	* @param numericPredictor if true use numeric predictor attributes
1311	* @param stringPredictor if true use string predictor attributes
1312	* @param datePredictor if true use date predictor attributes
1313	* @param relationalPredictor if true use relational predictor attributes
1314	* @param multiInstance whether multi-instance is needed
1315	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1316	* @param missingLevel the percentage of missing values
1317	* @param predictorMissing true if the missing values may be in
1318	* the predictors
1319	* @param classMissing true if the missing values may be in the class
1320	* @param numTrain the number of instances in the training set
1321	* @param numClasses the number of classes
1322	* @param accepts the acceptable string in an exception
1323	* @return index 0 is true if the test was passed, index 1 is true if test
1324	* was acceptable
1325	*/
1326	protected boolean[] runBasicTest(boolean nominalPredictor,
1327	boolean numericPredictor,
1328	boolean stringPredictor,
1329	boolean datePredictor,
1330	boolean relationalPredictor,
1331	boolean multiInstance,
1332	int classType,
1333	int missingLevel,
1334	boolean predictorMissing,
1335	boolean classMissing,
1336	int numTrain,
1337	int numClasses,
1338	FastVector accepts) {
1339
1340	return runBasicTest(
1341	nominalPredictor,
1342	numericPredictor,
1343	stringPredictor,
1344	datePredictor,
1345	relationalPredictor,
1346	multiInstance,
1347	classType,
1348	TestInstances.CLASS_IS_LAST,
1349	missingLevel,
1350	predictorMissing,
1351	classMissing,
1352	numTrain,
1353	numClasses,
1354	accepts);
1355	}
1356
1357	/**
1358	* Runs a text on the datasets with the given characteristics.
1359	*
1360	* @param nominalPredictor if true use nominal predictor attributes
1361	* @param numericPredictor if true use numeric predictor attributes
1362	* @param stringPredictor if true use string predictor attributes
1363	* @param datePredictor if true use date predictor attributes
1364	* @param relationalPredictor if true use relational predictor attributes
1365	* @param multiInstance whether multi-instance is needed
1366	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1367	* @param classIndex the attribute index of the class
1368	* @param missingLevel the percentage of missing values
1369	* @param predictorMissing true if the missing values may be in
1370	* the predictors
1371	* @param classMissing true if the missing values may be in the class
1372	* @param numTrain the number of instances in the training set
1373	* @param numClasses the number of classes
1374	* @param accepts the acceptable string in an exception
1375	* @return index 0 is true if the test was passed, index 1 is true if test
1376	* was acceptable
1377	*/
1378	protected boolean[] runBasicTest(boolean nominalPredictor,
1379	boolean numericPredictor,
1380	boolean stringPredictor,
1381	boolean datePredictor,
1382	boolean relationalPredictor,
1383	boolean multiInstance,
1384	int classType,
1385	int classIndex,
1386	int missingLevel,
1387	boolean predictorMissing,
1388	boolean classMissing,
1389	int numTrain,
1390	int numClasses,
1391	FastVector accepts) {
1392
1393	boolean[] result = new boolean[2];
1394	Instances train = null;
1395	ASSearch search = null;
1396	ASEvaluation evaluation = null;
1397	try {
1398	train = makeTestDataset(42, numTrain,
1399	nominalPredictor ? getNumNominal() : 0,
1400	numericPredictor ? getNumNumeric() : 0,
1401	stringPredictor ? getNumString() : 0,
1402	datePredictor ? getNumDate() : 0,
1403	relationalPredictor ? getNumRelational() : 0,
1404	numClasses,
1405	classType,
1406	classIndex,
1407	multiInstance);
1408	if (missingLevel > 0)
1409	addMissing(train, missingLevel, predictorMissing, classMissing);
1410	search = ASSearch.makeCopies(getSearch(), 1)[0];
1411	evaluation = ASEvaluation.makeCopies(getEvaluator(), 1)[0];
1412	} catch (Exception ex) {
1413	ex.printStackTrace();
1414	throw new Error("Error setting up for tests: " + ex.getMessage());
1415	}
1416	try {
1417	search(search, evaluation, train);
1418	println("yes");
1419	result[0] = true;
1420	}
1421	catch (Exception ex) {
1422	boolean acceptable = false;
1423	String msg;
1424	if (ex.getMessage() == null)
1425	msg = "";
1426	else
1427	msg = ex.getMessage().toLowerCase();
1428	if (msg.indexOf("not in classpath") > -1)
1429	m_ClasspathProblems = true;
1430	for (int i = 0; i < accepts.size(); i++) {
1431	if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {
1432	acceptable = true;
1433	}
1434	}
1435
1436	println("no" + (acceptable ? " (OK error message)" : ""));
1437	result[1] = acceptable;
1438
1439	if (m_Debug) {
1440	println("\n=== Full Report ===");
1441	print("Problem during training");
1442	println(": " + ex.getMessage() + "\n");
1443	if (!acceptable) {
1444	if (accepts.size() > 0) {
1445	print("Error message doesn't mention ");
1446	for (int i = 0; i < accepts.size(); i++) {
1447	if (i != 0) {
1448	print(" or ");
1449	}
1450	print('"' + (String)accepts.elementAt(i) + '"');
1451	}
1452	}
1453	println("here is the dataset:\n");
1454	println("=== Train Dataset ===\n"
1455	+ train.toString() + "\n");
1456	}
1457	}
1458	}
1459
1460	return result;
1461	}
1462
1463	/**
1464	* Make a simple set of instances, which can later be modified
1465	* for use in specific tests.
1466	*
1467	* @param seed the random number seed
1468	* @param numInstances the number of instances to generate
1469	* @param numNominal the number of nominal attributes
1470	* @param numNumeric the number of numeric attributes
1471	* @param numString the number of string attributes
1472	* @param numDate the number of date attributes
1473	* @param numRelational the number of relational attributes
1474	* @param numClasses the number of classes (if nominal class)
1475	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1476	* @param multiInstance whether the dataset should a multi-instance dataset
1477	* @return the test dataset
1478	* @throws Exception if the dataset couldn't be generated
1479	* @see #process(Instances)
1480	*/
1481	protected Instances makeTestDataset(int seed, int numInstances,
1482	int numNominal, int numNumeric,
1483	int numString, int numDate,
1484	int numRelational,
1485	int numClasses, int classType,
1486	boolean multiInstance)
1487	throws Exception {
1488
1489	return makeTestDataset(
1490	seed,
1491	numInstances,
1492	numNominal,
1493	numNumeric,
1494	numString,
1495	numDate,
1496	numRelational,
1497	numClasses,
1498	classType,
1499	TestInstances.CLASS_IS_LAST,
1500	multiInstance);
1501	}
1502
1503	/**
1504	* Make a simple set of instances with variable position of the class
1505	* attribute, which can later be modified for use in specific tests.
1506	*
1507	* @param seed the random number seed
1508	* @param numInstances the number of instances to generate
1509	* @param numNominal the number of nominal attributes
1510	* @param numNumeric the number of numeric attributes
1511	* @param numString the number of string attributes
1512	* @param numDate the number of date attributes
1513	* @param numRelational the number of relational attributes
1514	* @param numClasses the number of classes (if nominal class)
1515	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1516	* @param classIndex the index of the class (0-based, -1 as last)
1517	* @param multiInstance whether the dataset should a multi-instance dataset
1518	* @return the test dataset
1519	* @throws Exception if the dataset couldn't be generated
1520	* @see TestInstances#CLASS_IS_LAST
1521	* @see #process(Instances)
1522	*/
1523	protected Instances makeTestDataset(int seed, int numInstances,
1524	int numNominal, int numNumeric,
1525	int numString, int numDate,
1526	int numRelational,
1527	int numClasses, int classType,
1528	int classIndex,
1529	boolean multiInstance)
1530	throws Exception {
1531
1532	TestInstances dataset = new TestInstances();
1533
1534	dataset.setSeed(seed);
1535	dataset.setNumInstances(numInstances);
1536	dataset.setNumNominal(numNominal);
1537	dataset.setNumNumeric(numNumeric);
1538	dataset.setNumString(numString);
1539	dataset.setNumDate(numDate);
1540	dataset.setNumRelational(numRelational);
1541	dataset.setNumClasses(numClasses);
1542	dataset.setClassType(classType);
1543	dataset.setClassIndex(classIndex);
1544	dataset.setNumClasses(numClasses);
1545	dataset.setMultiInstance(multiInstance);
1546	dataset.setWords(getWords());
1547	dataset.setWordSeparators(getWordSeparators());
1548
1549	return process(dataset.generate());
1550	}
1551
1552	/**
1553	* Print out a short summary string for the dataset characteristics
1554	*
1555	* @param nominalPredictor true if nominal predictor attributes are present
1556	* @param numericPredictor true if numeric predictor attributes are present
1557	* @param stringPredictor true if string predictor attributes are present
1558	* @param datePredictor true if date predictor attributes are present
1559	* @param relationalPredictor true if relational predictor attributes are present
1560	* @param multiInstance whether multi-instance is needed
1561	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1562	*/
1563	protected void printAttributeSummary(boolean nominalPredictor,
1564	boolean numericPredictor,
1565	boolean stringPredictor,
1566	boolean datePredictor,
1567	boolean relationalPredictor,
1568	boolean multiInstance,
1569	int classType) {
1570
1571	String str = "";
1572
1573	if (numericPredictor)
1574	str += " numeric";
1575
1576	if (nominalPredictor) {
1577	if (str.length() > 0)
1578	str += " &";
1579	str += " nominal";
1580	}
1581
1582	if (stringPredictor) {
1583	if (str.length() > 0)
1584	str += " &";
1585	str += " string";
1586	}
1587
1588	if (datePredictor) {
1589	if (str.length() > 0)
1590	str += " &";
1591	str += " date";
1592	}
1593
1594	if (relationalPredictor) {
1595	if (str.length() > 0)
1596	str += " &";
1597	str += " relational";
1598	}
1599
1600	str += " predictors)";
1601
1602	switch (classType) {
1603	case Attribute.NUMERIC:
1604	str = " (numeric class," + str;
1605	break;
1606	case Attribute.NOMINAL:
1607	str = " (nominal class," + str;
1608	break;
1609	case Attribute.STRING:
1610	str = " (string class," + str;
1611	break;
1612	case Attribute.DATE:
1613	str = " (date class," + str;
1614	break;
1615	case Attribute.RELATIONAL:
1616	str = " (relational class," + str;
1617	break;
1618	}
1619
1620	print(str);
1621	}
1622
1623	/**
1624	* Returns the revision string.
1625	*
1626	* @return the revision
1627	*/
1628	public String getRevision() {
1629	return RevisionUtils.extract("$Revision: 4783 $");
1630	}
1631
1632	/**
1633	* Test method for this class
1634	*
1635	* @param args the commandline parameters
1636	*/
1637	public static void main(String [] args) {
1638	runCheck(new CheckAttributeSelection(), args);
1639	}
1640	}
1641

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/MetisMQI/src/main/java/weka/attributeSelection/CheckAttributeSelection.java @ 29

Download in other formats: