Context Navigation

source: src/main/java/weka/associations/CheckAssociator.java @ 13

Last change on this file since 13 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 54.4 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* CheckAssociator.java
19	* Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23	package weka.associations;
24
25	import weka.core.Attribute;
26	import weka.core.CheckScheme;
27	import weka.core.FastVector;
28	import weka.core.Instances;
29	import weka.core.MultiInstanceCapabilitiesHandler;
30	import weka.core.Option;
31	import weka.core.OptionHandler;
32	import weka.core.RevisionHandler;
33	import weka.core.RevisionUtils;
34	import weka.core.SerializationHelper;
35	import weka.core.TestInstances;
36	import weka.core.Utils;
37	import weka.core.WeightedInstancesHandler;
38
39	import java.util.Enumeration;
40	import java.util.Random;
41	import java.util.Vector;
42
43	/**
44	* Class for examining the capabilities and finding problems with
45	* associators. If you implement an associators using the WEKA.libraries,
46	* you should run the checks on it to ensure robustness and correct
47	* operation. Passing all the tests of this object does not mean
48	* bugs in the associators don't exist, but this will help find some
49	* common ones. <p/>
50	*
51	* Typical usage: <p/>
52	* <code>java weka.associations.CheckAssociator -W associator_name
53	* -- associator_options </code><p/>
54	*
55	* CheckAssociator reports on the following:
56	* <ul>
57	* <li> Associator abilities
58	* <ul>
59	* <li> Possible command line options to the associators </li>
60	* <li> Whether the associators can predict nominal, numeric, string,
61	* date or relational class attributes. </li>
62	* <li> Whether the associators can handle numeric predictor attributes </li>
63	* <li> Whether the associators can handle nominal predictor attributes </li>
64	* <li> Whether the associators can handle string predictor attributes </li>
65	* <li> Whether the associators can handle date predictor attributes </li>
66	* <li> Whether the associators can handle relational predictor attributes </li>
67	* <li> Whether the associators can handle multi-instance data </li>
68	* <li> Whether the associators can handle missing predictor values </li>
69	* <li> Whether the associators can handle missing class values </li>
70	* <li> Whether a nominal associators only handles 2 class problems </li>
71	* <li> Whether the associators can handle instance weights </li>
72	* </ul>
73	* </li>
74	* <li> Correct functioning
75	* <ul>
76	* <li> Correct initialisation during buildAssociations (i.e. no result
77	* changes when buildAssociations called repeatedly) </li>
78	* <li> Whether the associators alters the data pased to it
79	* (number of instances, instance order, instance weights, etc) </li>
80	* </ul>
81	* </li>
82	* <li> Degenerate cases
83	* <ul>
84	* <li> building associators with zero training instances </li>
85	* <li> all but one predictor attribute values missing </li>
86	* <li> all predictor attribute values missing </li>
87	* <li> all but one class values missing </li>
88	* <li> all class values missing </li>
89	* </ul>
90	* </li>
91	* </ul>
92	* Running CheckAssociator with the debug option set will output the
93	* training dataset for any failed tests.<p/>
94	*
95	* The <code>weka.associations.AbstractAssociatorTest</code> uses this
96	* class to test all the associators. Any changes here, have to be
97	* checked in that abstract test class, too. <p/>
98	*
99	<!-- options-start -->
100	* Valid options are: <p/>
101	*
102	* <pre> -D
103	* Turn on debugging output.</pre>
104	*
105	* <pre> -S
106	* Silent mode - prints nothing to stdout.</pre>
107	*
108	* <pre> -N <num>
109	* The number of instances in the datasets (default 20).</pre>
110	*
111	* <pre> -nominal <num>
112	* The number of nominal attributes (default 2).</pre>
113	*
114	* <pre> -nominal-values <num>
115	* The number of values for nominal attributes (default 1).</pre>
116	*
117	* <pre> -numeric <num>
118	* The number of numeric attributes (default 1).</pre>
119	*
120	* <pre> -string <num>
121	* The number of string attributes (default 1).</pre>
122	*
123	* <pre> -date <num>
124	* The number of date attributes (default 1).</pre>
125	*
126	* <pre> -relational <num>
127	* The number of relational attributes (default 1).</pre>
128	*
129	* <pre> -num-instances-relational <num>
130	* The number of instances in relational/bag attributes (default 10).</pre>
131	*
132	* <pre> -words <comma-separated-list>
133	* The words to use in string attributes.</pre>
134	*
135	* <pre> -word-separators <chars>
136	* The word separators to use in string attributes.</pre>
137	*
138	* <pre> -W
139	* Full name of the associator analysed.
140	* eg: weka.associations.Apriori
141	* (default weka.associations.Apriori)</pre>
142	*
143	* <pre>
144	* Options specific to associator weka.associations.Apriori:
145	* </pre>
146	*
147	* <pre> -N <required number of rules output>
148	* The required number of rules. (default = 10)</pre>
149	*
150	* <pre> -T <0=confidence \| 1=lift \| 2=leverage \| 3=Conviction>
151	* The metric type by which to rank rules. (default = confidence)</pre>
152	*
153	* <pre> -C <minimum metric score of a rule>
154	* The minimum confidence of a rule. (default = 0.9)</pre>
155	*
156	* <pre> -D <delta for minimum support>
157	* The delta by which the minimum support is decreased in
158	* each iteration. (default = 0.05)</pre>
159	*
160	* <pre> -U <upper bound for minimum support>
161	* Upper bound for minimum support. (default = 1.0)</pre>
162	*
163	* <pre> -M <lower bound for minimum support>
164	* The lower bound for the minimum support. (default = 0.1)</pre>
165	*
166	* <pre> -S <significance level>
167	* If used, rules are tested for significance at
168	* the given level. Slower. (default = no significance testing)</pre>
169	*
170	* <pre> -I
171	* If set the itemsets found are also output. (default = no)</pre>
172	*
173	* <pre> -R
174	* Remove columns that contain all missing values (default = no)</pre>
175	*
176	* <pre> -V
177	* Report progress iteratively. (default = no)</pre>
178	*
179	* <pre> -A
180	* If set class association rules are mined. (default = no)</pre>
181	*
182	* <pre> -c <the class index>
183	* The class index. (default = last)</pre>
184	*
185	<!-- options-end -->
186	*
187	* Options after -- are passed to the designated associator.<p/>
188	*
189	* @author Len Trigg (trigg@cs.waikato.ac.nz)
190	* @author FracPete (fracpete at waikato dot ac dot nz)
191	* @version $Revision: 1.7 $
192	* @see TestInstances
193	*/
194	public class CheckAssociator
195	extends CheckScheme
196	implements RevisionHandler {
197
198	/*
199	* Note about test methods:
200	* - methods return array of booleans
201	* - first index: success or not
202	* - second index: acceptable or not (e.g., Exception is OK)
203	*
204	* FracPete (fracpete at waikato dot ac dot nz)
205	*/
206
207	/** a "dummy" class type */
208	public final static int NO_CLASS = -1;
209
210	/*** The associator to be examined */
211	protected Associator m_Associator = new weka.associations.Apriori();
212
213	/**
214	* Returns an enumeration describing the available options.
215	*
216	* @return an enumeration of all the available options.
217	*/
218	public Enumeration listOptions() {
219	Vector result = new Vector();
220
221	Enumeration en = super.listOptions();
222	while (en.hasMoreElements())
223	result.addElement(en.nextElement());
224
225	result.addElement(new Option(
226	"\tFull name of the associator analysed.\n"
227	+"\teg: weka.associations.Apriori\n"
228	+ "\t(default weka.associations.Apriori)",
229	"W", 1, "-W"));
230
231	if ((m_Associator != null)
232	&& (m_Associator instanceof OptionHandler)) {
233	result.addElement(new Option("", "", 0,
234	"\nOptions specific to associator "
235	+ m_Associator.getClass().getName()
236	+ ":"));
237	Enumeration enu = ((OptionHandler)m_Associator).listOptions();
238	while (enu.hasMoreElements())
239	result.addElement(enu.nextElement());
240	}
241
242	return result.elements();
243	}
244
245	/**
246	* Parses a given list of options.
247	*
248	<!-- options-start -->
249	* Valid options are: <p/>
250	*
251	* <pre> -D
252	* Turn on debugging output.</pre>
253	*
254	* <pre> -S
255	* Silent mode - prints nothing to stdout.</pre>
256	*
257	* <pre> -N <num>
258	* The number of instances in the datasets (default 20).</pre>
259	*
260	* <pre> -nominal <num>
261	* The number of nominal attributes (default 2).</pre>
262	*
263	* <pre> -nominal-values <num>
264	* The number of values for nominal attributes (default 1).</pre>
265	*
266	* <pre> -numeric <num>
267	* The number of numeric attributes (default 1).</pre>
268	*
269	* <pre> -string <num>
270	* The number of string attributes (default 1).</pre>
271	*
272	* <pre> -date <num>
273	* The number of date attributes (default 1).</pre>
274	*
275	* <pre> -relational <num>
276	* The number of relational attributes (default 1).</pre>
277	*
278	* <pre> -num-instances-relational <num>
279	* The number of instances in relational/bag attributes (default 10).</pre>
280	*
281	* <pre> -words <comma-separated-list>
282	* The words to use in string attributes.</pre>
283	*
284	* <pre> -word-separators <chars>
285	* The word separators to use in string attributes.</pre>
286	*
287	* <pre> -W
288	* Full name of the associator analysed.
289	* eg: weka.associations.Apriori
290	* (default weka.associations.Apriori)</pre>
291	*
292	* <pre>
293	* Options specific to associator weka.associations.Apriori:
294	* </pre>
295	*
296	* <pre> -N <required number of rules output>
297	* The required number of rules. (default = 10)</pre>
298	*
299	* <pre> -T <0=confidence \| 1=lift \| 2=leverage \| 3=Conviction>
300	* The metric type by which to rank rules. (default = confidence)</pre>
301	*
302	* <pre> -C <minimum metric score of a rule>
303	* The minimum confidence of a rule. (default = 0.9)</pre>
304	*
305	* <pre> -D <delta for minimum support>
306	* The delta by which the minimum support is decreased in
307	* each iteration. (default = 0.05)</pre>
308	*
309	* <pre> -U <upper bound for minimum support>
310	* Upper bound for minimum support. (default = 1.0)</pre>
311	*
312	* <pre> -M <lower bound for minimum support>
313	* The lower bound for the minimum support. (default = 0.1)</pre>
314	*
315	* <pre> -S <significance level>
316	* If used, rules are tested for significance at
317	* the given level. Slower. (default = no significance testing)</pre>
318	*
319	* <pre> -I
320	* If set the itemsets found are also output. (default = no)</pre>
321	*
322	* <pre> -R
323	* Remove columns that contain all missing values (default = no)</pre>
324	*
325	* <pre> -V
326	* Report progress iteratively. (default = no)</pre>
327	*
328	* <pre> -A
329	* If set class association rules are mined. (default = no)</pre>
330	*
331	* <pre> -c <the class index>
332	* The class index. (default = last)</pre>
333	*
334	<!-- options-end -->
335	*
336	* @param options the list of options as an array of strings
337	* @throws Exception if an option is not supported
338	*/
339	public void setOptions(String[] options) throws Exception {
340	String tmpStr;
341
342	super.setOptions(options);
343
344	tmpStr = Utils.getOption('W', options);
345	if (tmpStr.length() == 0)
346	tmpStr = weka.associations.Apriori.class.getName();
347	setAssociator(
348	(Associator) forName(
349	"weka.associations",
350	Associator.class,
351	tmpStr,
352	Utils.partitionOptions(options)));
353	}
354
355	/**
356	* Gets the current settings of the CheckAssociator.
357	*
358	* @return an array of strings suitable for passing to setOptions
359	*/
360	public String[] getOptions() {
361	Vector result;
362	String[] options;
363	int i;
364
365	result = new Vector();
366
367	options = super.getOptions();
368	for (i = 0; i < options.length; i++)
369	result.add(options[i]);
370
371	if (getAssociator() != null) {
372	result.add("-W");
373	result.add(getAssociator().getClass().getName());
374	}
375
376	if ((m_Associator != null) && (m_Associator instanceof OptionHandler))
377	options = ((OptionHandler) m_Associator).getOptions();
378	else
379	options = new String[0];
380
381	if (options.length > 0) {
382	result.add("--");
383	for (i = 0; i < options.length; i++)
384	result.add(options[i]);
385	}
386
387	return (String[]) result.toArray(new String[result.size()]);
388	}
389
390	/**
391	* Begin the tests, reporting results to System.out
392	*/
393	public void doTests() {
394
395	if (getAssociator() == null) {
396	println("\n=== No associator set ===");
397	return;
398	}
399	println("\n=== Check on Associator: "
400	+ getAssociator().getClass().getName()
401	+ " ===\n");
402
403	// Start tests
404	m_ClasspathProblems = false;
405	println("--> Checking for interfaces");
406	canTakeOptions();
407	boolean weightedInstancesHandler = weightedInstancesHandler()[0];
408	boolean multiInstanceHandler = multiInstanceHandler()[0];
409	println("--> Associator tests");
410	declaresSerialVersionUID();
411	println("--> no class attribute");
412	testsWithoutClass(weightedInstancesHandler, multiInstanceHandler);
413	println("--> with class attribute");
414	testsPerClassType(Attribute.NOMINAL, weightedInstancesHandler, multiInstanceHandler);
415	testsPerClassType(Attribute.NUMERIC, weightedInstancesHandler, multiInstanceHandler);
416	testsPerClassType(Attribute.DATE, weightedInstancesHandler, multiInstanceHandler);
417	testsPerClassType(Attribute.STRING, weightedInstancesHandler, multiInstanceHandler);
418	testsPerClassType(Attribute.RELATIONAL, weightedInstancesHandler, multiInstanceHandler);
419	}
420
421	/**
422	* Set the associator to test.
423	*
424	* @param newAssociator the Associator to use.
425	*/
426	public void setAssociator(Associator newAssociator) {
427	m_Associator = newAssociator;
428	}
429
430	/**
431	* Get the associator being tested
432	*
433	* @return the associator being tested
434	*/
435	public Associator getAssociator() {
436	return m_Associator;
437	}
438
439	/**
440	* Run a battery of tests for a given class attribute type
441	*
442	* @param classType true if the class attribute should be numeric
443	* @param weighted true if the associator says it handles weights
444	* @param multiInstance true if the associator is a multi-instance associator
445	*/
446	protected void testsPerClassType(int classType,
447	boolean weighted,
448	boolean multiInstance) {
449
450	boolean PNom = canPredict(true, false, false, false, false, multiInstance, classType)[0];
451	boolean PNum = canPredict(false, true, false, false, false, multiInstance, classType)[0];
452	boolean PStr = canPredict(false, false, true, false, false, multiInstance, classType)[0];
453	boolean PDat = canPredict(false, false, false, true, false, multiInstance, classType)[0];
454	boolean PRel;
455	if (!multiInstance)
456	PRel = canPredict(false, false, false, false, true, multiInstance, classType)[0];
457	else
458	PRel = false;
459
460	if (PNom \|\| PNum \|\| PStr \|\| PDat \|\| PRel) {
461	if (weighted)
462	instanceWeights(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
463
464	if (classType == Attribute.NOMINAL)
465	canHandleNClasses(PNom, PNum, PStr, PDat, PRel, multiInstance, 4);
466
467	if (!multiInstance) {
468	canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 0);
469	canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 1);
470	}
471
472	canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
473	boolean handleMissingPredictors = canHandleMissing(PNom, PNum, PStr, PDat, PRel,
474	multiInstance, classType,
475	true, false, 20)[0];
476	if (handleMissingPredictors)
477	canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, true, false, 100);
478
479	boolean handleMissingClass = canHandleMissing(PNom, PNum, PStr, PDat, PRel,
480	multiInstance, classType,
481	false, true, 20)[0];
482	if (handleMissingClass)
483	canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, false, true, 100);
484
485	correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
486	datasetIntegrity(PNom, PNum, PStr, PDat, PRel, multiInstance, classType,
487	handleMissingPredictors, handleMissingClass);
488	}
489	}
490
491	/**
492	* Run a battery of tests without a class
493	*
494	* @param weighted true if the associator says it handles weights
495	* @param multiInstance true if the associator is a multi-instance associator
496	*/
497	protected void testsWithoutClass(boolean weighted,
498	boolean multiInstance) {
499
500	boolean PNom = canPredict(true, false, false, false, false, multiInstance, NO_CLASS)[0];
501	boolean PNum = canPredict(false, true, false, false, false, multiInstance, NO_CLASS)[0];
502	boolean PStr = canPredict(false, false, true, false, false, multiInstance, NO_CLASS)[0];
503	boolean PDat = canPredict(false, false, false, true, false, multiInstance, NO_CLASS)[0];
504	boolean PRel;
505	if (!multiInstance)
506	PRel = canPredict(false, false, false, false, true, multiInstance, NO_CLASS)[0];
507	else
508	PRel = false;
509
510	if (PNom \|\| PNum \|\| PStr \|\| PDat \|\| PRel) {
511	if (weighted)
512	instanceWeights(PNom, PNum, PStr, PDat, PRel, multiInstance, NO_CLASS);
513
514	canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel, multiInstance, NO_CLASS);
515	boolean handleMissingPredictors = canHandleMissing(PNom, PNum, PStr, PDat, PRel,
516	multiInstance, NO_CLASS,
517	true, false, 20)[0];
518	if (handleMissingPredictors)
519	canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, NO_CLASS, true, false, 100);
520
521	correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel, multiInstance, NO_CLASS);
522	datasetIntegrity(PNom, PNum, PStr, PDat, PRel, multiInstance, NO_CLASS,
523	handleMissingPredictors, false);
524	}
525	}
526
527	/**
528	* Checks whether the scheme can take command line options.
529	*
530	* @return index 0 is true if the associator can take options
531	*/
532	protected boolean[] canTakeOptions() {
533
534	boolean[] result = new boolean[2];
535
536	print("options...");
537	if (m_Associator instanceof OptionHandler) {
538	println("yes");
539	if (m_Debug) {
540	println("\n=== Full report ===");
541	Enumeration enu = ((OptionHandler)m_Associator).listOptions();
542	while (enu.hasMoreElements()) {
543	Option option = (Option) enu.nextElement();
544	print(option.synopsis() + "\n"
545	+ option.description() + "\n");
546	}
547	println("\n");
548	}
549	result[0] = true;
550	}
551	else {
552	println("no");
553	result[0] = false;
554	}
555
556	return result;
557	}
558
559	/**
560	* Checks whether the scheme says it can handle instance weights.
561	*
562	* @return true if the associator handles instance weights
563	*/
564	protected boolean[] weightedInstancesHandler() {
565
566	boolean[] result = new boolean[2];
567
568	print("weighted instances associator...");
569	if (m_Associator instanceof WeightedInstancesHandler) {
570	println("yes");
571	result[0] = true;
572	}
573	else {
574	println("no");
575	result[0] = false;
576	}
577
578	return result;
579	}
580
581	/**
582	* Checks whether the scheme handles multi-instance data.
583	*
584	* @return true if the associator handles multi-instance data
585	*/
586	protected boolean[] multiInstanceHandler() {
587	boolean[] result = new boolean[2];
588
589	print("multi-instance associator...");
590	if (m_Associator instanceof MultiInstanceCapabilitiesHandler) {
591	println("yes");
592	result[0] = true;
593	}
594	else {
595	println("no");
596	result[0] = false;
597	}
598
599	return result;
600	}
601
602	/**
603	* tests for a serialVersionUID. Fails in case the scheme doesn't declare
604	* a UID.
605	*
606	* @return index 0 is true if the scheme declares a UID
607	*/
608	protected boolean[] declaresSerialVersionUID() {
609	boolean[] result = new boolean[2];
610
611	print("serialVersionUID...");
612
613	result[0] = !SerializationHelper.needsUID(m_Associator.getClass());
614
615	if (result[0])
616	println("yes");
617	else
618	println("no");
619
620	return result;
621	}
622
623	/**
624	* Checks basic prediction of the scheme, for simple non-troublesome
625	* datasets.
626	*
627	* @param nominalPredictor if true use nominal predictor attributes
628	* @param numericPredictor if true use numeric predictor attributes
629	* @param stringPredictor if true use string predictor attributes
630	* @param datePredictor if true use date predictor attributes
631	* @param relationalPredictor if true use relational predictor attributes
632	* @param multiInstance whether multi-instance is needed
633	* @param classType the class type (NOMINAL, NUMERIC, etc.)
634	* @return index 0 is true if the test was passed, index 1 is true if test
635	* was acceptable
636	*/
637	protected boolean[] canPredict(
638	boolean nominalPredictor,
639	boolean numericPredictor,
640	boolean stringPredictor,
641	boolean datePredictor,
642	boolean relationalPredictor,
643	boolean multiInstance,
644	int classType) {
645
646	print("basic predict");
647	printAttributeSummary(
648	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
649	print("...");
650	FastVector accepts = new FastVector();
651	accepts.addElement("any");
652	accepts.addElement("unary");
653	accepts.addElement("binary");
654	accepts.addElement("nominal");
655	accepts.addElement("numeric");
656	accepts.addElement("string");
657	accepts.addElement("date");
658	accepts.addElement("relational");
659	accepts.addElement("multi-instance");
660	accepts.addElement("not in classpath");
661	int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
662	boolean predictorMissing = false, classMissing = false;
663
664	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
665	datePredictor, relationalPredictor,
666	multiInstance,
667	classType,
668	missingLevel, predictorMissing, classMissing,
669	numTrain, numClasses,
670	accepts);
671	}
672
673	/**
674	* Checks whether nominal schemes can handle more than two classes.
675	* If a scheme is only designed for two-class problems it should
676	* throw an appropriate exception for multi-class problems.
677	*
678	* @param nominalPredictor if true use nominal predictor attributes
679	* @param numericPredictor if true use numeric predictor attributes
680	* @param stringPredictor if true use string predictor attributes
681	* @param datePredictor if true use date predictor attributes
682	* @param relationalPredictor if true use relational predictor attributes
683	* @param multiInstance whether multi-instance is needed
684	* @param numClasses the number of classes to test
685	* @return index 0 is true if the test was passed, index 1 is true if test
686	* was acceptable
687	*/
688	protected boolean[] canHandleNClasses(
689	boolean nominalPredictor,
690	boolean numericPredictor,
691	boolean stringPredictor,
692	boolean datePredictor,
693	boolean relationalPredictor,
694	boolean multiInstance,
695	int numClasses) {
696
697	print("more than two class problems");
698	printAttributeSummary(
699	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, Attribute.NOMINAL);
700	print("...");
701	FastVector accepts = new FastVector();
702	accepts.addElement("number");
703	accepts.addElement("class");
704	int numTrain = getNumInstances(), missingLevel = 0;
705	boolean predictorMissing = false, classMissing = false;
706
707	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
708	datePredictor, relationalPredictor,
709	multiInstance,
710	Attribute.NOMINAL,
711	missingLevel, predictorMissing, classMissing,
712	numTrain, numClasses,
713	accepts);
714	}
715
716	/**
717	* Checks whether the scheme can handle class attributes as Nth attribute.
718	*
719	* @param nominalPredictor if true use nominal predictor attributes
720	* @param numericPredictor if true use numeric predictor attributes
721	* @param stringPredictor if true use string predictor attributes
722	* @param datePredictor if true use date predictor attributes
723	* @param relationalPredictor if true use relational predictor attributes
724	* @param multiInstance whether multi-instance is needed
725	* @param classType the class type (NUMERIC, NOMINAL, etc.)
726	* @param classIndex the index of the class attribute (0-based, -1 means last attribute)
727	* @return index 0 is true if the test was passed, index 1 is true if test
728	* was acceptable
729	* @see TestInstances#CLASS_IS_LAST
730	*/
731	protected boolean[] canHandleClassAsNthAttribute(
732	boolean nominalPredictor,
733	boolean numericPredictor,
734	boolean stringPredictor,
735	boolean datePredictor,
736	boolean relationalPredictor,
737	boolean multiInstance,
738	int classType,
739	int classIndex) {
740
741	if (classIndex == TestInstances.CLASS_IS_LAST)
742	print("class attribute as last attribute");
743	else
744	print("class attribute as " + (classIndex + 1) + ". attribute");
745	printAttributeSummary(
746	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
747	print("...");
748	FastVector accepts = new FastVector();
749	int numTrain = getNumInstances(), numClasses = 2,
750	missingLevel = 0;
751	boolean predictorMissing = false, classMissing = false;
752
753	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
754	datePredictor, relationalPredictor,
755	multiInstance,
756	classType,
757	classIndex,
758	missingLevel, predictorMissing, classMissing,
759	numTrain, numClasses,
760	accepts);
761	}
762
763	/**
764	* Checks whether the scheme can handle zero training instances.
765	*
766	* @param nominalPredictor if true use nominal predictor attributes
767	* @param numericPredictor if true use numeric predictor attributes
768	* @param stringPredictor if true use string predictor attributes
769	* @param datePredictor if true use date predictor attributes
770	* @param relationalPredictor if true use relational predictor attributes
771	* @param multiInstance whether multi-instance is needed
772	* @param classType the class type (NUMERIC, NOMINAL, etc.)
773	* @return index 0 is true if the test was passed, index 1 is true if test
774	* was acceptable
775	*/
776	protected boolean[] canHandleZeroTraining(
777	boolean nominalPredictor,
778	boolean numericPredictor,
779	boolean stringPredictor,
780	boolean datePredictor,
781	boolean relationalPredictor,
782	boolean multiInstance,
783	int classType) {
784
785	print("handle zero training instances");
786	printAttributeSummary(
787	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
788	print("...");
789	FastVector accepts = new FastVector();
790	accepts.addElement("train");
791	accepts.addElement("value");
792	int numTrain = 0, numClasses = 2, missingLevel = 0;
793	boolean predictorMissing = false, classMissing = false;
794
795	return runBasicTest(
796	nominalPredictor, numericPredictor, stringPredictor,
797	datePredictor, relationalPredictor,
798	multiInstance,
799	classType,
800	missingLevel, predictorMissing, classMissing,
801	numTrain, numClasses,
802	accepts);
803	}
804
805	/**
806	* Checks whether the scheme correctly initialises models when
807	* buildAssociations is called. This test calls buildAssociations with
808	* one training dataset. buildAssociations is then called on a training
809	* set with different structure, and then again with the original training
810	* set. If the equals method of the AssociatorEvaluation class returns
811	* false, this is noted as incorrect build initialisation.
812	*
813	* @param nominalPredictor if true use nominal predictor attributes
814	* @param numericPredictor if true use numeric predictor attributes
815	* @param stringPredictor if true use string predictor attributes
816	* @param datePredictor if true use date predictor attributes
817	* @param relationalPredictor if true use relational predictor attributes
818	* @param multiInstance whether multi-instance is needed
819	* @param classType the class type (NUMERIC, NOMINAL, etc.)
820	* @return index 0 is true if the test was passed
821	*/
822	protected boolean[] correctBuildInitialisation(
823	boolean nominalPredictor,
824	boolean numericPredictor,
825	boolean stringPredictor,
826	boolean datePredictor,
827	boolean relationalPredictor,
828	boolean multiInstance,
829	int classType) {
830
831	boolean[] result = new boolean[2];
832
833	print("correct initialisation during buildAssociations");
834	printAttributeSummary(
835	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
836	print("...");
837	int numTrain = getNumInstances(),
838	numClasses = 2, missingLevel = 0;
839	boolean predictorMissing = false, classMissing = false;
840
841	Instances train1 = null;
842	Instances train2 = null;
843	Associator associator = null;
844	AssociatorEvaluation evaluation1A = null;
845	AssociatorEvaluation evaluation1B = null;
846	AssociatorEvaluation evaluation2 = null;
847	int stage = 0;
848	try {
849
850	// Make two train sets with different numbers of attributes
851	train1 = makeTestDataset(42, numTrain,
852	nominalPredictor ? getNumNominal() : 0,
853	numericPredictor ? getNumNumeric() : 0,
854	stringPredictor ? getNumString() : 0,
855	datePredictor ? getNumDate() : 0,
856	relationalPredictor ? getNumRelational() : 0,
857	numClasses,
858	classType,
859	multiInstance);
860	train2 = makeTestDataset(84, numTrain,
861	nominalPredictor ? getNumNominal() + 1 : 0,
862	numericPredictor ? getNumNumeric() + 1 : 0,
863	stringPredictor ? getNumString() + 1 : 0,
864	datePredictor ? getNumDate() + 1 : 0,
865	relationalPredictor ? getNumRelational() + 1 : 0,
866	numClasses,
867	classType,
868	multiInstance);
869	if (missingLevel > 0) {
870	addMissing(train1, missingLevel, predictorMissing, classMissing);
871	addMissing(train2, missingLevel, predictorMissing, classMissing);
872	}
873
874	associator = AbstractAssociator.makeCopies(getAssociator(), 1)[0];
875	evaluation1A = new AssociatorEvaluation();
876	evaluation1B = new AssociatorEvaluation();
877	evaluation2 = new AssociatorEvaluation();
878	} catch (Exception ex) {
879	throw new Error("Error setting up for tests: " + ex.getMessage());
880	}
881	try {
882	stage = 0;
883	evaluation1A.evaluate(associator, train1);
884
885	stage = 1;
886	evaluation2.evaluate(associator, train2);
887
888	stage = 2;
889	evaluation1B.evaluate(associator, train1);
890
891	stage = 3;
892	if (!evaluation1A.equals(evaluation1B)) {
893	if (m_Debug) {
894	println("\n=== Full report ===\n"
895	+ evaluation1A.toSummaryString("\nFirst buildAssociations()")
896	+ "\n\n");
897	println(
898	evaluation1B.toSummaryString("\nSecond buildAssociations()")
899	+ "\n\n");
900	}
901	throw new Exception("Results differ between buildAssociations calls");
902	}
903	println("yes");
904	result[0] = true;
905
906	if (false && m_Debug) {
907	println("\n=== Full report ===\n"
908	+ evaluation1A.toSummaryString("\nFirst buildAssociations()")
909	+ "\n\n");
910	println(
911	evaluation1B.toSummaryString("\nSecond buildAssociations()")
912	+ "\n\n");
913	}
914	}
915	catch (Exception ex) {
916	println("no");
917	result[0] = false;
918
919	if (m_Debug) {
920	println("\n=== Full Report ===");
921	print("Problem during building");
922	switch (stage) {
923	case 0:
924	print(" of dataset 1");
925	break;
926	case 1:
927	print(" of dataset 2");
928	break;
929	case 2:
930	print(" of dataset 1 (2nd build)");
931	break;
932	case 3:
933	print(", comparing results from builds of dataset 1");
934	break;
935	}
936	println(": " + ex.getMessage() + "\n");
937	println("here are the datasets:\n");
938	println("=== Train1 Dataset ===\n"
939	+ train1.toString() + "\n");
940	println("=== Train2 Dataset ===\n"
941	+ train2.toString() + "\n");
942	}
943	}
944
945	return result;
946	}
947
948	/**
949	* Checks basic missing value handling of the scheme. If the missing
950	* values cause an exception to be thrown by the scheme, this will be
951	* recorded.
952	*
953	* @param nominalPredictor if true use nominal predictor attributes
954	* @param numericPredictor if true use numeric predictor attributes
955	* @param stringPredictor if true use string predictor attributes
956	* @param datePredictor if true use date predictor attributes
957	* @param relationalPredictor if true use relational predictor attributes
958	* @param multiInstance whether multi-instance is needed
959	* @param classType the class type (NUMERIC, NOMINAL, etc.)
960	* @param predictorMissing true if the missing values may be in
961	* the predictors
962	* @param classMissing true if the missing values may be in the class
963	* @param missingLevel the percentage of missing values
964	* @return index 0 is true if the test was passed, index 1 is true if test
965	* was acceptable
966	*/
967	protected boolean[] canHandleMissing(
968	boolean nominalPredictor,
969	boolean numericPredictor,
970	boolean stringPredictor,
971	boolean datePredictor,
972	boolean relationalPredictor,
973	boolean multiInstance,
974	int classType,
975	boolean predictorMissing,
976	boolean classMissing,
977	int missingLevel) {
978
979	if (missingLevel == 100)
980	print("100% ");
981	print("missing");
982	if (predictorMissing) {
983	print(" predictor");
984	if (classMissing)
985	print(" and");
986	}
987	if (classMissing)
988	print(" class");
989	print(" values");
990	printAttributeSummary(
991	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
992	print("...");
993	FastVector accepts = new FastVector();
994	accepts.addElement("missing");
995	accepts.addElement("value");
996	accepts.addElement("train");
997	int numTrain = getNumInstances(), numClasses = 2;
998
999	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
1000	datePredictor, relationalPredictor,
1001	multiInstance,
1002	classType,
1003	missingLevel, predictorMissing, classMissing,
1004	numTrain, numClasses,
1005	accepts);
1006	}
1007
1008	/**
1009	* Checks whether the associator can handle instance weights.
1010	* This test compares the associator performance on two datasets
1011	* that are identical except for the training weights. If the
1012	* results change, then the associator must be using the weights. It
1013	* may be possible to get a false positive from this test if the
1014	* weight changes aren't significant enough to induce a change
1015	* in associator performance (but the weights are chosen to minimize
1016	* the likelihood of this).
1017	*
1018	* @param nominalPredictor if true use nominal predictor attributes
1019	* @param numericPredictor if true use numeric predictor attributes
1020	* @param stringPredictor if true use string predictor attributes
1021	* @param datePredictor if true use date predictor attributes
1022	* @param relationalPredictor if true use relational predictor attributes
1023	* @param multiInstance whether multi-instance is needed
1024	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1025	* @return index 0 true if the test was passed
1026	*/
1027	protected boolean[] instanceWeights(
1028	boolean nominalPredictor,
1029	boolean numericPredictor,
1030	boolean stringPredictor,
1031	boolean datePredictor,
1032	boolean relationalPredictor,
1033	boolean multiInstance,
1034	int classType) {
1035
1036	print("associator uses instance weights");
1037	printAttributeSummary(
1038	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
1039	print("...");
1040	int numTrain = 2*getNumInstances(),
1041	numClasses = 2, missingLevel = 0;
1042	boolean predictorMissing = false, classMissing = false;
1043
1044	boolean[] result = new boolean[2];
1045	Instances train = null;
1046	Associator [] associators = null;
1047	AssociatorEvaluation evaluationB = null;
1048	AssociatorEvaluation evaluationI = null;
1049	boolean evalFail = false;
1050	try {
1051	train = makeTestDataset(42, numTrain,
1052	nominalPredictor ? getNumNominal() + 1 : 0,
1053	numericPredictor ? getNumNumeric() + 1 : 0,
1054	stringPredictor ? getNumString() : 0,
1055	datePredictor ? getNumDate() : 0,
1056	relationalPredictor ? getNumRelational() : 0,
1057	numClasses,
1058	classType,
1059	multiInstance);
1060	if (missingLevel > 0)
1061	addMissing(train, missingLevel, predictorMissing, classMissing);
1062	associators = AbstractAssociator.makeCopies(getAssociator(), 2);
1063	evaluationB = new AssociatorEvaluation();
1064	evaluationI = new AssociatorEvaluation();
1065	evaluationB.evaluate(associators[0], train);
1066	} catch (Exception ex) {
1067	throw new Error("Error setting up for tests: " + ex.getMessage());
1068	}
1069	try {
1070
1071	// Now modify instance weights and re-built/test
1072	for (int i = 0; i < train.numInstances(); i++) {
1073	train.instance(i).setWeight(0);
1074	}
1075	Random random = new Random(1);
1076	for (int i = 0; i < train.numInstances() / 2; i++) {
1077	int inst = Math.abs(random.nextInt()) % train.numInstances();
1078	int weight = Math.abs(random.nextInt()) % 10 + 1;
1079	train.instance(inst).setWeight(weight);
1080	}
1081	evaluationI.evaluate(associators[1], train);
1082	if (evaluationB.equals(evaluationI)) {
1083	// println("no");
1084	evalFail = true;
1085	throw new Exception("evalFail");
1086	}
1087
1088	println("yes");
1089	result[0] = true;
1090	} catch (Exception ex) {
1091	println("no");
1092	result[0] = false;
1093
1094	if (m_Debug) {
1095	println("\n=== Full Report ===");
1096
1097	if (evalFail) {
1098	println("Results don't differ between non-weighted and "
1099	+ "weighted instance models.");
1100	println("Here are the results:\n");
1101	println(evaluationB.toSummaryString("\nboth methods\n"));
1102	} else {
1103	print("Problem during building");
1104	println(": " + ex.getMessage() + "\n");
1105	}
1106	println("Here is the dataset:\n");
1107	println("=== Train Dataset ===\n"
1108	+ train.toString() + "\n");
1109	println("=== Train Weights ===\n");
1110	for (int i = 0; i < train.numInstances(); i++) {
1111	println(" " + (i + 1)
1112	+ " " + train.instance(i).weight());
1113	}
1114	}
1115	}
1116
1117	return result;
1118	}
1119
1120	/**
1121	* Checks whether the scheme alters the training dataset during
1122	* building. If the scheme needs to modify the data it should take
1123	* a copy of the training data. Currently checks for changes to header
1124	* structure, number of instances, order of instances, instance weights.
1125	*
1126	* @param nominalPredictor if true use nominal predictor attributes
1127	* @param numericPredictor if true use numeric predictor attributes
1128	* @param stringPredictor if true use string predictor attributes
1129	* @param datePredictor if true use date predictor attributes
1130	* @param relationalPredictor if true use relational predictor attributes
1131	* @param multiInstance whether multi-instance is needed
1132	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1133	* @param predictorMissing true if we know the associator can handle
1134	* (at least) moderate missing predictor values
1135	* @param classMissing true if we know the associator can handle
1136	* (at least) moderate missing class values
1137	* @return index 0 is true if the test was passed
1138	*/
1139	protected boolean[] datasetIntegrity(
1140	boolean nominalPredictor,
1141	boolean numericPredictor,
1142	boolean stringPredictor,
1143	boolean datePredictor,
1144	boolean relationalPredictor,
1145	boolean multiInstance,
1146	int classType,
1147	boolean predictorMissing,
1148	boolean classMissing) {
1149
1150	print("associator doesn't alter original datasets");
1151	printAttributeSummary(
1152	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
1153	print("...");
1154	int numTrain = getNumInstances(),
1155	numClasses = 2, missingLevel = 20;
1156
1157	boolean[] result = new boolean[2];
1158	Instances train = null;
1159	Associator associator = null;
1160	try {
1161	train = makeTestDataset(42, numTrain,
1162	nominalPredictor ? getNumNominal() : 0,
1163	numericPredictor ? getNumNumeric() : 0,
1164	stringPredictor ? getNumString() : 0,
1165	datePredictor ? getNumDate() : 0,
1166	relationalPredictor ? getNumRelational() : 0,
1167	numClasses,
1168	classType,
1169	multiInstance);
1170	if (missingLevel > 0)
1171	addMissing(train, missingLevel, predictorMissing, classMissing);
1172	associator = AbstractAssociator.makeCopies(getAssociator(), 1)[0];
1173	} catch (Exception ex) {
1174	throw new Error("Error setting up for tests: " + ex.getMessage());
1175	}
1176	try {
1177	Instances trainCopy = new Instances(train);
1178	associator.buildAssociations(trainCopy);
1179	compareDatasets(train, trainCopy);
1180
1181	println("yes");
1182	result[0] = true;
1183	} catch (Exception ex) {
1184	println("no");
1185	result[0] = false;
1186
1187	if (m_Debug) {
1188	println("\n=== Full Report ===");
1189	print("Problem during building");
1190	println(": " + ex.getMessage() + "\n");
1191	println("Here is the dataset:\n");
1192	println("=== Train Dataset ===\n"
1193	+ train.toString() + "\n");
1194	}
1195	}
1196
1197	return result;
1198	}
1199
1200	/**
1201	* Runs a text on the datasets with the given characteristics.
1202	*
1203	* @param nominalPredictor if true use nominal predictor attributes
1204	* @param numericPredictor if true use numeric predictor attributes
1205	* @param stringPredictor if true use string predictor attributes
1206	* @param datePredictor if true use date predictor attributes
1207	* @param relationalPredictor if true use relational predictor attributes
1208	* @param multiInstance whether multi-instance is needed
1209	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1210	* @param missingLevel the percentage of missing values
1211	* @param predictorMissing true if the missing values may be in
1212	* the predictors
1213	* @param classMissing true if the missing values may be in the class
1214	* @param numTrain the number of instances in the training set
1215	* @param numClasses the number of classes
1216	* @param accepts the acceptable string in an exception
1217	* @return index 0 is true if the test was passed, index 1 is true if test
1218	* was acceptable
1219	*/
1220	protected boolean[] runBasicTest(boolean nominalPredictor,
1221	boolean numericPredictor,
1222	boolean stringPredictor,
1223	boolean datePredictor,
1224	boolean relationalPredictor,
1225	boolean multiInstance,
1226	int classType,
1227	int missingLevel,
1228	boolean predictorMissing,
1229	boolean classMissing,
1230	int numTrain,
1231	int numClasses,
1232	FastVector accepts) {
1233
1234	return runBasicTest(
1235	nominalPredictor,
1236	numericPredictor,
1237	stringPredictor,
1238	datePredictor,
1239	relationalPredictor,
1240	multiInstance,
1241	classType,
1242	TestInstances.CLASS_IS_LAST,
1243	missingLevel,
1244	predictorMissing,
1245	classMissing,
1246	numTrain,
1247	numClasses,
1248	accepts);
1249	}
1250
1251	/**
1252	* Runs a text on the datasets with the given characteristics.
1253	*
1254	* @param nominalPredictor if true use nominal predictor attributes
1255	* @param numericPredictor if true use numeric predictor attributes
1256	* @param stringPredictor if true use string predictor attributes
1257	* @param datePredictor if true use date predictor attributes
1258	* @param relationalPredictor if true use relational predictor attributes
1259	* @param multiInstance whether multi-instance is needed
1260	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1261	* @param classIndex the attribute index of the class
1262	* @param missingLevel the percentage of missing values
1263	* @param predictorMissing true if the missing values may be in
1264	* the predictors
1265	* @param classMissing true if the missing values may be in the class
1266	* @param numTrain the number of instances in the training set
1267	* @param numClasses the number of classes
1268	* @param accepts the acceptable string in an exception
1269	* @return index 0 is true if the test was passed, index 1 is true if test
1270	* was acceptable
1271	*/
1272	protected boolean[] runBasicTest(boolean nominalPredictor,
1273	boolean numericPredictor,
1274	boolean stringPredictor,
1275	boolean datePredictor,
1276	boolean relationalPredictor,
1277	boolean multiInstance,
1278	int classType,
1279	int classIndex,
1280	int missingLevel,
1281	boolean predictorMissing,
1282	boolean classMissing,
1283	int numTrain,
1284	int numClasses,
1285	FastVector accepts) {
1286
1287	boolean[] result = new boolean[2];
1288	Instances train = null;
1289	Associator associator = null;
1290	try {
1291	train = makeTestDataset(42, numTrain,
1292	nominalPredictor ? getNumNominal() : 0,
1293	numericPredictor ? getNumNumeric() : 0,
1294	stringPredictor ? getNumString() : 0,
1295	datePredictor ? getNumDate() : 0,
1296	relationalPredictor ? getNumRelational() : 0,
1297	numClasses,
1298	classType,
1299	classIndex,
1300	multiInstance);
1301	if (missingLevel > 0)
1302	addMissing(train, missingLevel, predictorMissing, classMissing);
1303	associator = AbstractAssociator.makeCopies(getAssociator(), 1)[0];
1304	} catch (Exception ex) {
1305	ex.printStackTrace();
1306	throw new Error("Error setting up for tests: " + ex.getMessage());
1307	}
1308	try {
1309	associator.buildAssociations(train);
1310	println("yes");
1311	result[0] = true;
1312	}
1313	catch (Exception ex) {
1314	boolean acceptable = false;
1315	String msg;
1316	if (ex.getMessage() == null)
1317	msg = "";
1318	else
1319	msg = ex.getMessage().toLowerCase();
1320	if (msg.indexOf("not in classpath") > -1)
1321	m_ClasspathProblems = true;
1322
1323	for (int i = 0; i < accepts.size(); i++) {
1324	if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {
1325	acceptable = true;
1326	}
1327	}
1328
1329	println("no" + (acceptable ? " (OK error message)" : ""));
1330	result[1] = acceptable;
1331
1332	if (m_Debug) {
1333	println("\n=== Full Report ===");
1334	print("Problem during building");
1335	println(": " + ex.getMessage() + "\n");
1336	if (!acceptable) {
1337	if (accepts.size() > 0) {
1338	print("Error message doesn't mention ");
1339	for (int i = 0; i < accepts.size(); i++) {
1340	if (i != 0) {
1341	print(" or ");
1342	}
1343	print('"' + (String)accepts.elementAt(i) + '"');
1344	}
1345	}
1346	println("here is the dataset:\n");
1347	println("=== Train Dataset ===\n"
1348	+ train.toString() + "\n");
1349	}
1350	}
1351	}
1352
1353	return result;
1354	}
1355
1356	/**
1357	* Make a simple set of instances, which can later be modified
1358	* for use in specific tests.
1359	*
1360	* @param seed the random number seed
1361	* @param numInstances the number of instances to generate
1362	* @param numNominal the number of nominal attributes
1363	* @param numNumeric the number of numeric attributes
1364	* @param numString the number of string attributes
1365	* @param numDate the number of date attributes
1366	* @param numRelational the number of relational attributes
1367	* @param numClasses the number of classes (if nominal class)
1368	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1369	* @param multiInstance whether the dataset should a multi-instance dataset
1370	* @return the test dataset
1371	* @throws Exception if the dataset couldn't be generated
1372	* @see #process(Instances)
1373	*/
1374	protected Instances makeTestDataset(int seed, int numInstances,
1375	int numNominal, int numNumeric,
1376	int numString, int numDate,
1377	int numRelational,
1378	int numClasses, int classType,
1379	boolean multiInstance)
1380	throws Exception {
1381
1382	return makeTestDataset(
1383	seed,
1384	numInstances,
1385	numNominal,
1386	numNumeric,
1387	numString,
1388	numDate,
1389	numRelational,
1390	numClasses,
1391	classType,
1392	TestInstances.CLASS_IS_LAST,
1393	multiInstance);
1394	}
1395
1396	/**
1397	* Make a simple set of instances with variable position of the class
1398	* attribute, which can later be modified for use in specific tests.
1399	*
1400	* @param seed the random number seed
1401	* @param numInstances the number of instances to generate
1402	* @param numNominal the number of nominal attributes
1403	* @param numNumeric the number of numeric attributes
1404	* @param numString the number of string attributes
1405	* @param numDate the number of date attributes
1406	* @param numRelational the number of relational attributes
1407	* @param numClasses the number of classes (if nominal class)
1408	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1409	* @param classIndex the index of the class (0-based, -1 as last)
1410	* @param multiInstance whether the dataset should a multi-instance dataset
1411	* @return the test dataset
1412	* @throws Exception if the dataset couldn't be generated
1413	* @see TestInstances#CLASS_IS_LAST
1414	* @see #process(Instances)
1415	*/
1416	protected Instances makeTestDataset(int seed, int numInstances,
1417	int numNominal, int numNumeric,
1418	int numString, int numDate,
1419	int numRelational,
1420	int numClasses, int classType,
1421	int classIndex,
1422	boolean multiInstance)
1423	throws Exception {
1424
1425	TestInstances dataset = new TestInstances();
1426
1427	dataset.setSeed(seed);
1428	dataset.setNumInstances(numInstances);
1429	dataset.setNumNominal(numNominal);
1430	dataset.setNumNumeric(numNumeric);
1431	dataset.setNumString(numString);
1432	dataset.setNumDate(numDate);
1433	dataset.setNumRelational(numRelational);
1434	dataset.setNumClasses(numClasses);
1435	if (classType == NO_CLASS) {
1436	dataset.setClassType(Attribute.NOMINAL); // ignored
1437	dataset.setClassIndex(TestInstances.NO_CLASS);
1438	}
1439	else {
1440	dataset.setClassType(classType);
1441	dataset.setClassIndex(classIndex);
1442	}
1443	dataset.setNumClasses(numClasses);
1444	dataset.setMultiInstance(multiInstance);
1445	dataset.setWords(getWords());
1446	dataset.setWordSeparators(getWordSeparators());
1447
1448	return process(dataset.generate());
1449	}
1450
1451	/**
1452	* Print out a short summary string for the dataset characteristics
1453	*
1454	* @param nominalPredictor true if nominal predictor attributes are present
1455	* @param numericPredictor true if numeric predictor attributes are present
1456	* @param stringPredictor true if string predictor attributes are present
1457	* @param datePredictor true if date predictor attributes are present
1458	* @param relationalPredictor true if relational predictor attributes are present
1459	* @param multiInstance whether multi-instance is needed
1460	* @param classType the class type (NUMERIC, NOMINAL, etc.)
1461	*/
1462	protected void printAttributeSummary(boolean nominalPredictor,
1463	boolean numericPredictor,
1464	boolean stringPredictor,
1465	boolean datePredictor,
1466	boolean relationalPredictor,
1467	boolean multiInstance,
1468	int classType) {
1469
1470	String str = "";
1471
1472	if (numericPredictor)
1473	str += " numeric";
1474
1475	if (nominalPredictor) {
1476	if (str.length() > 0)
1477	str += " &";
1478	str += " nominal";
1479	}
1480
1481	if (stringPredictor) {
1482	if (str.length() > 0)
1483	str += " &";
1484	str += " string";
1485	}
1486
1487	if (datePredictor) {
1488	if (str.length() > 0)
1489	str += " &";
1490	str += " date";
1491	}
1492
1493	if (relationalPredictor) {
1494	if (str.length() > 0)
1495	str += " &";
1496	str += " relational";
1497	}
1498
1499	str += " predictors)";
1500
1501	switch (classType) {
1502	case Attribute.NUMERIC:
1503	str = " (numeric class," + str;
1504	break;
1505	case Attribute.NOMINAL:
1506	str = " (nominal class," + str;
1507	break;
1508	case Attribute.STRING:
1509	str = " (string class," + str;
1510	break;
1511	case Attribute.DATE:
1512	str = " (date class," + str;
1513	break;
1514	case Attribute.RELATIONAL:
1515	str = " (relational class," + str;
1516	break;
1517	case NO_CLASS:
1518	str = " (no class," + str;
1519	break;
1520	}
1521
1522	print(str);
1523	}
1524
1525	/**
1526	* Returns the revision string.
1527	*
1528	* @return the revision
1529	*/
1530	public String getRevision() {
1531	return RevisionUtils.extract("$Revision: 1.7 $");
1532	}
1533
1534	/**
1535	* Test method for this class
1536	*
1537	* @param args the commandline parameters
1538	*/
1539	public static void main(String [] args) {
1540	runCheck(new CheckAssociator(), args);
1541	}
1542	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: