Context Navigation

CheckAssociator.java

Last change on this file was 29, checked in by gnappo, 14 years ago
Taggata versione per la demo e aggiunto branch.
File size: 54.4 KB

Rev	Line
[29]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* CheckAssociator.java
	19	* Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23	package weka.associations;
	24
	25	import weka.core.Attribute;
	26	import weka.core.CheckScheme;
	27	import weka.core.FastVector;
	28	import weka.core.Instances;
	29	import weka.core.MultiInstanceCapabilitiesHandler;
	30	import weka.core.Option;
	31	import weka.core.OptionHandler;
	32	import weka.core.RevisionHandler;
	33	import weka.core.RevisionUtils;
	34	import weka.core.SerializationHelper;
	35	import weka.core.TestInstances;
	36	import weka.core.Utils;
	37	import weka.core.WeightedInstancesHandler;
	38
	39	import java.util.Enumeration;
	40	import java.util.Random;
	41	import java.util.Vector;
	42
	43	/**
	44	* Class for examining the capabilities and finding problems with
	45	* associators. If you implement an associators using the WEKA.libraries,
	46	* you should run the checks on it to ensure robustness and correct
	47	* operation. Passing all the tests of this object does not mean
	48	* bugs in the associators don't exist, but this will help find some
	49	* common ones. <p/>
	50	*
	51	* Typical usage: <p/>
	52	* <code>java weka.associations.CheckAssociator -W associator_name
	53	* -- associator_options </code><p/>
	54	*
	55	* CheckAssociator reports on the following:
	56	* <ul>
	57	* <li> Associator abilities
	58	* <ul>
	59	* <li> Possible command line options to the associators </li>
	60	* <li> Whether the associators can predict nominal, numeric, string,
	61	* date or relational class attributes. </li>
	62	* <li> Whether the associators can handle numeric predictor attributes </li>
	63	* <li> Whether the associators can handle nominal predictor attributes </li>
	64	* <li> Whether the associators can handle string predictor attributes </li>
	65	* <li> Whether the associators can handle date predictor attributes </li>
	66	* <li> Whether the associators can handle relational predictor attributes </li>
	67	* <li> Whether the associators can handle multi-instance data </li>
	68	* <li> Whether the associators can handle missing predictor values </li>
	69	* <li> Whether the associators can handle missing class values </li>
	70	* <li> Whether a nominal associators only handles 2 class problems </li>
	71	* <li> Whether the associators can handle instance weights </li>
	72	* </ul>
	73	* </li>
	74	* <li> Correct functioning
	75	* <ul>
	76	* <li> Correct initialisation during buildAssociations (i.e. no result
	77	* changes when buildAssociations called repeatedly) </li>
	78	* <li> Whether the associators alters the data pased to it
	79	* (number of instances, instance order, instance weights, etc) </li>
	80	* </ul>
	81	* </li>
	82	* <li> Degenerate cases
	83	* <ul>
	84	* <li> building associators with zero training instances </li>
	85	* <li> all but one predictor attribute values missing </li>
	86	* <li> all predictor attribute values missing </li>
	87	* <li> all but one class values missing </li>
	88	* <li> all class values missing </li>
	89	* </ul>
	90	* </li>
	91	* </ul>
	92	* Running CheckAssociator with the debug option set will output the
	93	* training dataset for any failed tests.<p/>
	94	*
	95	* The <code>weka.associations.AbstractAssociatorTest</code> uses this
	96	* class to test all the associators. Any changes here, have to be
	97	* checked in that abstract test class, too. <p/>
	98	*
	99	<!-- options-start -->
	100	* Valid options are: <p/>
	101	*
	102	* <pre> -D
	103	* Turn on debugging output.</pre>
	104	*
	105	* <pre> -S
	106	* Silent mode - prints nothing to stdout.</pre>
	107	*
	108	* <pre> -N <num>
	109	* The number of instances in the datasets (default 20).</pre>
	110	*
	111	* <pre> -nominal <num>
	112	* The number of nominal attributes (default 2).</pre>
	113	*
	114	* <pre> -nominal-values <num>
	115	* The number of values for nominal attributes (default 1).</pre>
	116	*
	117	* <pre> -numeric <num>
	118	* The number of numeric attributes (default 1).</pre>
	119	*
	120	* <pre> -string <num>
	121	* The number of string attributes (default 1).</pre>
	122	*
	123	* <pre> -date <num>
	124	* The number of date attributes (default 1).</pre>
	125	*
	126	* <pre> -relational <num>
	127	* The number of relational attributes (default 1).</pre>
	128	*
	129	* <pre> -num-instances-relational <num>
	130	* The number of instances in relational/bag attributes (default 10).</pre>
	131	*
	132	* <pre> -words <comma-separated-list>
	133	* The words to use in string attributes.</pre>
	134	*
	135	* <pre> -word-separators <chars>
	136	* The word separators to use in string attributes.</pre>
	137	*
	138	* <pre> -W
	139	* Full name of the associator analysed.
	140	* eg: weka.associations.Apriori
	141	* (default weka.associations.Apriori)</pre>
	142	*
	143	* <pre>
	144	* Options specific to associator weka.associations.Apriori:
	145	* </pre>
	146	*
	147	* <pre> -N <required number of rules output>
	148	* The required number of rules. (default = 10)</pre>
	149	*
	150	* <pre> -T <0=confidence \| 1=lift \| 2=leverage \| 3=Conviction>
	151	* The metric type by which to rank rules. (default = confidence)</pre>
	152	*
	153	* <pre> -C <minimum metric score of a rule>
	154	* The minimum confidence of a rule. (default = 0.9)</pre>
	155	*
	156	* <pre> -D <delta for minimum support>
	157	* The delta by which the minimum support is decreased in
	158	* each iteration. (default = 0.05)</pre>
	159	*
	160	* <pre> -U <upper bound for minimum support>
	161	* Upper bound for minimum support. (default = 1.0)</pre>
	162	*
	163	* <pre> -M <lower bound for minimum support>
	164	* The lower bound for the minimum support. (default = 0.1)</pre>
	165	*
	166	* <pre> -S <significance level>
	167	* If used, rules are tested for significance at
	168	* the given level. Slower. (default = no significance testing)</pre>
	169	*
	170	* <pre> -I
	171	* If set the itemsets found are also output. (default = no)</pre>
	172	*
	173	* <pre> -R
	174	* Remove columns that contain all missing values (default = no)</pre>
	175	*
	176	* <pre> -V
	177	* Report progress iteratively. (default = no)</pre>
	178	*
	179	* <pre> -A
	180	* If set class association rules are mined. (default = no)</pre>
	181	*
	182	* <pre> -c <the class index>
	183	* The class index. (default = last)</pre>
	184	*
	185	<!-- options-end -->
	186	*
	187	* Options after -- are passed to the designated associator.<p/>
	188	*
	189	* @author Len Trigg (trigg@cs.waikato.ac.nz)
	190	* @author FracPete (fracpete at waikato dot ac dot nz)
	191	* @version $Revision: 1.7 $
	192	* @see TestInstances
	193	*/
	194	public class CheckAssociator
	195	extends CheckScheme
	196	implements RevisionHandler {
	197
	198	/*
	199	* Note about test methods:
	200	* - methods return array of booleans
	201	* - first index: success or not
	202	* - second index: acceptable or not (e.g., Exception is OK)
	203	*
	204	* FracPete (fracpete at waikato dot ac dot nz)
	205	*/
	206
	207	/** a "dummy" class type */
	208	public final static int NO_CLASS = -1;
	209
	210	/*** The associator to be examined */
	211	protected Associator m_Associator = new weka.associations.Apriori();
	212
	213	/**
	214	* Returns an enumeration describing the available options.
	215	*
	216	* @return an enumeration of all the available options.
	217	*/
	218	public Enumeration listOptions() {
	219	Vector result = new Vector();
	220
	221	Enumeration en = super.listOptions();
	222	while (en.hasMoreElements())
	223	result.addElement(en.nextElement());
	224
	225	result.addElement(new Option(
	226	"\tFull name of the associator analysed.\n"
	227	+"\teg: weka.associations.Apriori\n"
	228	+ "\t(default weka.associations.Apriori)",
	229	"W", 1, "-W"));
	230
	231	if ((m_Associator != null)
	232	&& (m_Associator instanceof OptionHandler)) {
	233	result.addElement(new Option("", "", 0,
	234	"\nOptions specific to associator "
	235	+ m_Associator.getClass().getName()
	236	+ ":"));
	237	Enumeration enu = ((OptionHandler)m_Associator).listOptions();
	238	while (enu.hasMoreElements())
	239	result.addElement(enu.nextElement());
	240	}
	241
	242	return result.elements();
	243	}
	244
	245	/**
	246	* Parses a given list of options.
	247	*
	248	<!-- options-start -->
	249	* Valid options are: <p/>
	250	*
	251	* <pre> -D
	252	* Turn on debugging output.</pre>
	253	*
	254	* <pre> -S
	255	* Silent mode - prints nothing to stdout.</pre>
	256	*
	257	* <pre> -N <num>
	258	* The number of instances in the datasets (default 20).</pre>
	259	*
	260	* <pre> -nominal <num>
	261	* The number of nominal attributes (default 2).</pre>
	262	*
	263	* <pre> -nominal-values <num>
	264	* The number of values for nominal attributes (default 1).</pre>
	265	*
	266	* <pre> -numeric <num>
	267	* The number of numeric attributes (default 1).</pre>
	268	*
	269	* <pre> -string <num>
	270	* The number of string attributes (default 1).</pre>
	271	*
	272	* <pre> -date <num>
	273	* The number of date attributes (default 1).</pre>
	274	*
	275	* <pre> -relational <num>
	276	* The number of relational attributes (default 1).</pre>
	277	*
	278	* <pre> -num-instances-relational <num>
	279	* The number of instances in relational/bag attributes (default 10).</pre>
	280	*
	281	* <pre> -words <comma-separated-list>
	282	* The words to use in string attributes.</pre>
	283	*
	284	* <pre> -word-separators <chars>
	285	* The word separators to use in string attributes.</pre>
	286	*
	287	* <pre> -W
	288	* Full name of the associator analysed.
	289	* eg: weka.associations.Apriori
	290	* (default weka.associations.Apriori)</pre>
	291	*
	292	* <pre>
	293	* Options specific to associator weka.associations.Apriori:
	294	* </pre>
	295	*
	296	* <pre> -N <required number of rules output>
	297	* The required number of rules. (default = 10)</pre>
	298	*
	299	* <pre> -T <0=confidence \| 1=lift \| 2=leverage \| 3=Conviction>
	300	* The metric type by which to rank rules. (default = confidence)</pre>
	301	*
	302	* <pre> -C <minimum metric score of a rule>
	303	* The minimum confidence of a rule. (default = 0.9)</pre>
	304	*
	305	* <pre> -D <delta for minimum support>
	306	* The delta by which the minimum support is decreased in
	307	* each iteration. (default = 0.05)</pre>
	308	*
	309	* <pre> -U <upper bound for minimum support>
	310	* Upper bound for minimum support. (default = 1.0)</pre>
	311	*
	312	* <pre> -M <lower bound for minimum support>
	313	* The lower bound for the minimum support. (default = 0.1)</pre>
	314	*
	315	* <pre> -S <significance level>
	316	* If used, rules are tested for significance at
	317	* the given level. Slower. (default = no significance testing)</pre>
	318	*
	319	* <pre> -I
	320	* If set the itemsets found are also output. (default = no)</pre>
	321	*
	322	* <pre> -R
	323	* Remove columns that contain all missing values (default = no)</pre>
	324	*
	325	* <pre> -V
	326	* Report progress iteratively. (default = no)</pre>
	327	*
	328	* <pre> -A
	329	* If set class association rules are mined. (default = no)</pre>
	330	*
	331	* <pre> -c <the class index>
	332	* The class index. (default = last)</pre>
	333	*
	334	<!-- options-end -->
	335	*
	336	* @param options the list of options as an array of strings
	337	* @throws Exception if an option is not supported
	338	*/
	339	public void setOptions(String[] options) throws Exception {
	340	String tmpStr;
	341
	342	super.setOptions(options);
	343
	344	tmpStr = Utils.getOption('W', options);
	345	if (tmpStr.length() == 0)
	346	tmpStr = weka.associations.Apriori.class.getName();
	347	setAssociator(
	348	(Associator) forName(
	349	"weka.associations",
	350	Associator.class,
	351	tmpStr,
	352	Utils.partitionOptions(options)));
	353	}
	354
	355	/**
	356	* Gets the current settings of the CheckAssociator.
	357	*
	358	* @return an array of strings suitable for passing to setOptions
	359	*/
	360	public String[] getOptions() {
	361	Vector result;
	362	String[] options;
	363	int i;
	364
	365	result = new Vector();
	366
	367	options = super.getOptions();
	368	for (i = 0; i < options.length; i++)
	369	result.add(options[i]);
	370
	371	if (getAssociator() != null) {
	372	result.add("-W");
	373	result.add(getAssociator().getClass().getName());
	374	}
	375
	376	if ((m_Associator != null) && (m_Associator instanceof OptionHandler))
	377	options = ((OptionHandler) m_Associator).getOptions();
	378	else
	379	options = new String[0];
	380
	381	if (options.length > 0) {
	382	result.add("--");
	383	for (i = 0; i < options.length; i++)
	384	result.add(options[i]);
	385	}
	386
	387	return (String[]) result.toArray(new String[result.size()]);
	388	}
	389
	390	/**
	391	* Begin the tests, reporting results to System.out
	392	*/
	393	public void doTests() {
	394
	395	if (getAssociator() == null) {
	396	println("\n=== No associator set ===");
	397	return;
	398	}
	399	println("\n=== Check on Associator: "
	400	+ getAssociator().getClass().getName()
	401	+ " ===\n");
	402
	403	// Start tests
	404	m_ClasspathProblems = false;
	405	println("--> Checking for interfaces");
	406	canTakeOptions();
	407	boolean weightedInstancesHandler = weightedInstancesHandler()[0];
	408	boolean multiInstanceHandler = multiInstanceHandler()[0];
	409	println("--> Associator tests");
	410	declaresSerialVersionUID();
	411	println("--> no class attribute");
	412	testsWithoutClass(weightedInstancesHandler, multiInstanceHandler);
	413	println("--> with class attribute");
	414	testsPerClassType(Attribute.NOMINAL, weightedInstancesHandler, multiInstanceHandler);
	415	testsPerClassType(Attribute.NUMERIC, weightedInstancesHandler, multiInstanceHandler);
	416	testsPerClassType(Attribute.DATE, weightedInstancesHandler, multiInstanceHandler);
	417	testsPerClassType(Attribute.STRING, weightedInstancesHandler, multiInstanceHandler);
	418	testsPerClassType(Attribute.RELATIONAL, weightedInstancesHandler, multiInstanceHandler);
	419	}
	420
	421	/**
	422	* Set the associator to test.
	423	*
	424	* @param newAssociator the Associator to use.
	425	*/
	426	public void setAssociator(Associator newAssociator) {
	427	m_Associator = newAssociator;
	428	}
	429
	430	/**
	431	* Get the associator being tested
	432	*
	433	* @return the associator being tested
	434	*/
	435	public Associator getAssociator() {
	436	return m_Associator;
	437	}
	438
	439	/**
	440	* Run a battery of tests for a given class attribute type
	441	*
	442	* @param classType true if the class attribute should be numeric
	443	* @param weighted true if the associator says it handles weights
	444	* @param multiInstance true if the associator is a multi-instance associator
	445	*/
	446	protected void testsPerClassType(int classType,
	447	boolean weighted,
	448	boolean multiInstance) {
	449
	450	boolean PNom = canPredict(true, false, false, false, false, multiInstance, classType)[0];
	451	boolean PNum = canPredict(false, true, false, false, false, multiInstance, classType)[0];
	452	boolean PStr = canPredict(false, false, true, false, false, multiInstance, classType)[0];
	453	boolean PDat = canPredict(false, false, false, true, false, multiInstance, classType)[0];
	454	boolean PRel;
	455	if (!multiInstance)
	456	PRel = canPredict(false, false, false, false, true, multiInstance, classType)[0];
	457	else
	458	PRel = false;
	459
	460	if (PNom \|\| PNum \|\| PStr \|\| PDat \|\| PRel) {
	461	if (weighted)
	462	instanceWeights(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
	463
	464	if (classType == Attribute.NOMINAL)
	465	canHandleNClasses(PNom, PNum, PStr, PDat, PRel, multiInstance, 4);
	466
	467	if (!multiInstance) {
	468	canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 0);
	469	canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 1);
	470	}
	471
	472	canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
	473	boolean handleMissingPredictors = canHandleMissing(PNom, PNum, PStr, PDat, PRel,
	474	multiInstance, classType,
	475	true, false, 20)[0];
	476	if (handleMissingPredictors)
	477	canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, true, false, 100);
	478
	479	boolean handleMissingClass = canHandleMissing(PNom, PNum, PStr, PDat, PRel,
	480	multiInstance, classType,
	481	false, true, 20)[0];
	482	if (handleMissingClass)
	483	canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, false, true, 100);
	484
	485	correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
	486	datasetIntegrity(PNom, PNum, PStr, PDat, PRel, multiInstance, classType,
	487	handleMissingPredictors, handleMissingClass);
	488	}
	489	}
	490
	491	/**
	492	* Run a battery of tests without a class
	493	*
	494	* @param weighted true if the associator says it handles weights
	495	* @param multiInstance true if the associator is a multi-instance associator
	496	*/
	497	protected void testsWithoutClass(boolean weighted,
	498	boolean multiInstance) {
	499
	500	boolean PNom = canPredict(true, false, false, false, false, multiInstance, NO_CLASS)[0];
	501	boolean PNum = canPredict(false, true, false, false, false, multiInstance, NO_CLASS)[0];
	502	boolean PStr = canPredict(false, false, true, false, false, multiInstance, NO_CLASS)[0];
	503	boolean PDat = canPredict(false, false, false, true, false, multiInstance, NO_CLASS)[0];
	504	boolean PRel;
	505	if (!multiInstance)
	506	PRel = canPredict(false, false, false, false, true, multiInstance, NO_CLASS)[0];
	507	else
	508	PRel = false;
	509
	510	if (PNom \|\| PNum \|\| PStr \|\| PDat \|\| PRel) {
	511	if (weighted)
	512	instanceWeights(PNom, PNum, PStr, PDat, PRel, multiInstance, NO_CLASS);
	513
	514	canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel, multiInstance, NO_CLASS);
	515	boolean handleMissingPredictors = canHandleMissing(PNom, PNum, PStr, PDat, PRel,
	516	multiInstance, NO_CLASS,
	517	true, false, 20)[0];
	518	if (handleMissingPredictors)
	519	canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, NO_CLASS, true, false, 100);
	520
	521	correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel, multiInstance, NO_CLASS);
	522	datasetIntegrity(PNom, PNum, PStr, PDat, PRel, multiInstance, NO_CLASS,
	523	handleMissingPredictors, false);
	524	}
	525	}
	526
	527	/**
	528	* Checks whether the scheme can take command line options.
	529	*
	530	* @return index 0 is true if the associator can take options
	531	*/
	532	protected boolean[] canTakeOptions() {
	533
	534	boolean[] result = new boolean[2];
	535
	536	print("options...");
	537	if (m_Associator instanceof OptionHandler) {
	538	println("yes");
	539	if (m_Debug) {
	540	println("\n=== Full report ===");
	541	Enumeration enu = ((OptionHandler)m_Associator).listOptions();
	542	while (enu.hasMoreElements()) {
	543	Option option = (Option) enu.nextElement();
	544	print(option.synopsis() + "\n"
	545	+ option.description() + "\n");
	546	}
	547	println("\n");
	548	}
	549	result[0] = true;
	550	}
	551	else {
	552	println("no");
	553	result[0] = false;
	554	}
	555
	556	return result;
	557	}
	558
	559	/**
	560	* Checks whether the scheme says it can handle instance weights.
	561	*
	562	* @return true if the associator handles instance weights
	563	*/
	564	protected boolean[] weightedInstancesHandler() {
	565
	566	boolean[] result = new boolean[2];
	567
	568	print("weighted instances associator...");
	569	if (m_Associator instanceof WeightedInstancesHandler) {
	570	println("yes");
	571	result[0] = true;
	572	}
	573	else {
	574	println("no");
	575	result[0] = false;
	576	}
	577
	578	return result;
	579	}
	580
	581	/**
	582	* Checks whether the scheme handles multi-instance data.
	583	*
	584	* @return true if the associator handles multi-instance data
	585	*/
	586	protected boolean[] multiInstanceHandler() {
	587	boolean[] result = new boolean[2];
	588
	589	print("multi-instance associator...");
	590	if (m_Associator instanceof MultiInstanceCapabilitiesHandler) {
	591	println("yes");
	592	result[0] = true;
	593	}
	594	else {
	595	println("no");
	596	result[0] = false;
	597	}
	598
	599	return result;
	600	}
	601
	602	/**
	603	* tests for a serialVersionUID. Fails in case the scheme doesn't declare
	604	* a UID.
	605	*
	606	* @return index 0 is true if the scheme declares a UID
	607	*/
	608	protected boolean[] declaresSerialVersionUID() {
	609	boolean[] result = new boolean[2];
	610
	611	print("serialVersionUID...");
	612
	613	result[0] = !SerializationHelper.needsUID(m_Associator.getClass());
	614
	615	if (result[0])
	616	println("yes");
	617	else
	618	println("no");
	619
	620	return result;
	621	}
	622
	623	/**
	624	* Checks basic prediction of the scheme, for simple non-troublesome
	625	* datasets.
	626	*
	627	* @param nominalPredictor if true use nominal predictor attributes
	628	* @param numericPredictor if true use numeric predictor attributes
	629	* @param stringPredictor if true use string predictor attributes
	630	* @param datePredictor if true use date predictor attributes
	631	* @param relationalPredictor if true use relational predictor attributes
	632	* @param multiInstance whether multi-instance is needed
	633	* @param classType the class type (NOMINAL, NUMERIC, etc.)
	634	* @return index 0 is true if the test was passed, index 1 is true if test
	635	* was acceptable
	636	*/
	637	protected boolean[] canPredict(
	638	boolean nominalPredictor,
	639	boolean numericPredictor,
	640	boolean stringPredictor,
	641	boolean datePredictor,
	642	boolean relationalPredictor,
	643	boolean multiInstance,
	644	int classType) {
	645
	646	print("basic predict");
	647	printAttributeSummary(
	648	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	649	print("...");
	650	FastVector accepts = new FastVector();
	651	accepts.addElement("any");
	652	accepts.addElement("unary");
	653	accepts.addElement("binary");
	654	accepts.addElement("nominal");
	655	accepts.addElement("numeric");
	656	accepts.addElement("string");
	657	accepts.addElement("date");
	658	accepts.addElement("relational");
	659	accepts.addElement("multi-instance");
	660	accepts.addElement("not in classpath");
	661	int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
	662	boolean predictorMissing = false, classMissing = false;
	663
	664	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
	665	datePredictor, relationalPredictor,
	666	multiInstance,
	667	classType,
	668	missingLevel, predictorMissing, classMissing,
	669	numTrain, numClasses,
	670	accepts);
	671	}
	672
	673	/**
	674	* Checks whether nominal schemes can handle more than two classes.
	675	* If a scheme is only designed for two-class problems it should
	676	* throw an appropriate exception for multi-class problems.
	677	*
	678	* @param nominalPredictor if true use nominal predictor attributes
	679	* @param numericPredictor if true use numeric predictor attributes
	680	* @param stringPredictor if true use string predictor attributes
	681	* @param datePredictor if true use date predictor attributes
	682	* @param relationalPredictor if true use relational predictor attributes
	683	* @param multiInstance whether multi-instance is needed
	684	* @param numClasses the number of classes to test
	685	* @return index 0 is true if the test was passed, index 1 is true if test
	686	* was acceptable
	687	*/
	688	protected boolean[] canHandleNClasses(
	689	boolean nominalPredictor,
	690	boolean numericPredictor,
	691	boolean stringPredictor,
	692	boolean datePredictor,
	693	boolean relationalPredictor,
	694	boolean multiInstance,
	695	int numClasses) {
	696
	697	print("more than two class problems");
	698	printAttributeSummary(
	699	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, Attribute.NOMINAL);
	700	print("...");
	701	FastVector accepts = new FastVector();
	702	accepts.addElement("number");
	703	accepts.addElement("class");
	704	int numTrain = getNumInstances(), missingLevel = 0;
	705	boolean predictorMissing = false, classMissing = false;
	706
	707	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
	708	datePredictor, relationalPredictor,
	709	multiInstance,
	710	Attribute.NOMINAL,
	711	missingLevel, predictorMissing, classMissing,
	712	numTrain, numClasses,
	713	accepts);
	714	}
	715
	716	/**
	717	* Checks whether the scheme can handle class attributes as Nth attribute.
	718	*
	719	* @param nominalPredictor if true use nominal predictor attributes
	720	* @param numericPredictor if true use numeric predictor attributes
	721	* @param stringPredictor if true use string predictor attributes
	722	* @param datePredictor if true use date predictor attributes
	723	* @param relationalPredictor if true use relational predictor attributes
	724	* @param multiInstance whether multi-instance is needed
	725	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	726	* @param classIndex the index of the class attribute (0-based, -1 means last attribute)
	727	* @return index 0 is true if the test was passed, index 1 is true if test
	728	* was acceptable
	729	* @see TestInstances#CLASS_IS_LAST
	730	*/
	731	protected boolean[] canHandleClassAsNthAttribute(
	732	boolean nominalPredictor,
	733	boolean numericPredictor,
	734	boolean stringPredictor,
	735	boolean datePredictor,
	736	boolean relationalPredictor,
	737	boolean multiInstance,
	738	int classType,
	739	int classIndex) {
	740
	741	if (classIndex == TestInstances.CLASS_IS_LAST)
	742	print("class attribute as last attribute");
	743	else
	744	print("class attribute as " + (classIndex + 1) + ". attribute");
	745	printAttributeSummary(
	746	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	747	print("...");
	748	FastVector accepts = new FastVector();
	749	int numTrain = getNumInstances(), numClasses = 2,
	750	missingLevel = 0;
	751	boolean predictorMissing = false, classMissing = false;
	752
	753	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
	754	datePredictor, relationalPredictor,
	755	multiInstance,
	756	classType,
	757	classIndex,
	758	missingLevel, predictorMissing, classMissing,
	759	numTrain, numClasses,
	760	accepts);
	761	}
	762
	763	/**
	764	* Checks whether the scheme can handle zero training instances.
	765	*
	766	* @param nominalPredictor if true use nominal predictor attributes
	767	* @param numericPredictor if true use numeric predictor attributes
	768	* @param stringPredictor if true use string predictor attributes
	769	* @param datePredictor if true use date predictor attributes
	770	* @param relationalPredictor if true use relational predictor attributes
	771	* @param multiInstance whether multi-instance is needed
	772	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	773	* @return index 0 is true if the test was passed, index 1 is true if test
	774	* was acceptable
	775	*/
	776	protected boolean[] canHandleZeroTraining(
	777	boolean nominalPredictor,
	778	boolean numericPredictor,
	779	boolean stringPredictor,
	780	boolean datePredictor,
	781	boolean relationalPredictor,
	782	boolean multiInstance,
	783	int classType) {
	784
	785	print("handle zero training instances");
	786	printAttributeSummary(
	787	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	788	print("...");
	789	FastVector accepts = new FastVector();
	790	accepts.addElement("train");
	791	accepts.addElement("value");
	792	int numTrain = 0, numClasses = 2, missingLevel = 0;
	793	boolean predictorMissing = false, classMissing = false;
	794
	795	return runBasicTest(
	796	nominalPredictor, numericPredictor, stringPredictor,
	797	datePredictor, relationalPredictor,
	798	multiInstance,
	799	classType,
	800	missingLevel, predictorMissing, classMissing,
	801	numTrain, numClasses,
	802	accepts);
	803	}
	804
	805	/**
	806	* Checks whether the scheme correctly initialises models when
	807	* buildAssociations is called. This test calls buildAssociations with
	808	* one training dataset. buildAssociations is then called on a training
	809	* set with different structure, and then again with the original training
	810	* set. If the equals method of the AssociatorEvaluation class returns
	811	* false, this is noted as incorrect build initialisation.
	812	*
	813	* @param nominalPredictor if true use nominal predictor attributes
	814	* @param numericPredictor if true use numeric predictor attributes
	815	* @param stringPredictor if true use string predictor attributes
	816	* @param datePredictor if true use date predictor attributes
	817	* @param relationalPredictor if true use relational predictor attributes
	818	* @param multiInstance whether multi-instance is needed
	819	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	820	* @return index 0 is true if the test was passed
	821	*/
	822	protected boolean[] correctBuildInitialisation(
	823	boolean nominalPredictor,
	824	boolean numericPredictor,
	825	boolean stringPredictor,
	826	boolean datePredictor,
	827	boolean relationalPredictor,
	828	boolean multiInstance,
	829	int classType) {
	830
	831	boolean[] result = new boolean[2];
	832
	833	print("correct initialisation during buildAssociations");
	834	printAttributeSummary(
	835	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	836	print("...");
	837	int numTrain = getNumInstances(),
	838	numClasses = 2, missingLevel = 0;
	839	boolean predictorMissing = false, classMissing = false;
	840
	841	Instances train1 = null;
	842	Instances train2 = null;
	843	Associator associator = null;
	844	AssociatorEvaluation evaluation1A = null;
	845	AssociatorEvaluation evaluation1B = null;
	846	AssociatorEvaluation evaluation2 = null;
	847	int stage = 0;
	848	try {
	849
	850	// Make two train sets with different numbers of attributes
	851	train1 = makeTestDataset(42, numTrain,
	852	nominalPredictor ? getNumNominal() : 0,
	853	numericPredictor ? getNumNumeric() : 0,
	854	stringPredictor ? getNumString() : 0,
	855	datePredictor ? getNumDate() : 0,
	856	relationalPredictor ? getNumRelational() : 0,
	857	numClasses,
	858	classType,
	859	multiInstance);
	860	train2 = makeTestDataset(84, numTrain,
	861	nominalPredictor ? getNumNominal() + 1 : 0,
	862	numericPredictor ? getNumNumeric() + 1 : 0,
	863	stringPredictor ? getNumString() + 1 : 0,
	864	datePredictor ? getNumDate() + 1 : 0,
	865	relationalPredictor ? getNumRelational() + 1 : 0,
	866	numClasses,
	867	classType,
	868	multiInstance);
	869	if (missingLevel > 0) {
	870	addMissing(train1, missingLevel, predictorMissing, classMissing);
	871	addMissing(train2, missingLevel, predictorMissing, classMissing);
	872	}
	873
	874	associator = AbstractAssociator.makeCopies(getAssociator(), 1)[0];
	875	evaluation1A = new AssociatorEvaluation();
	876	evaluation1B = new AssociatorEvaluation();
	877	evaluation2 = new AssociatorEvaluation();
	878	} catch (Exception ex) {
	879	throw new Error("Error setting up for tests: " + ex.getMessage());
	880	}
	881	try {
	882	stage = 0;
	883	evaluation1A.evaluate(associator, train1);
	884
	885	stage = 1;
	886	evaluation2.evaluate(associator, train2);
	887
	888	stage = 2;
	889	evaluation1B.evaluate(associator, train1);
	890
	891	stage = 3;
	892	if (!evaluation1A.equals(evaluation1B)) {
	893	if (m_Debug) {
	894	println("\n=== Full report ===\n"
	895	+ evaluation1A.toSummaryString("\nFirst buildAssociations()")
	896	+ "\n\n");
	897	println(
	898	evaluation1B.toSummaryString("\nSecond buildAssociations()")
	899	+ "\n\n");
	900	}
	901	throw new Exception("Results differ between buildAssociations calls");
	902	}
	903	println("yes");
	904	result[0] = true;
	905
	906	if (false && m_Debug) {
	907	println("\n=== Full report ===\n"
	908	+ evaluation1A.toSummaryString("\nFirst buildAssociations()")
	909	+ "\n\n");
	910	println(
	911	evaluation1B.toSummaryString("\nSecond buildAssociations()")
	912	+ "\n\n");
	913	}
	914	}
	915	catch (Exception ex) {
	916	println("no");
	917	result[0] = false;
	918
	919	if (m_Debug) {
	920	println("\n=== Full Report ===");
	921	print("Problem during building");
	922	switch (stage) {
	923	case 0:
	924	print(" of dataset 1");
	925	break;
	926	case 1:
	927	print(" of dataset 2");
	928	break;
	929	case 2:
	930	print(" of dataset 1 (2nd build)");
	931	break;
	932	case 3:
	933	print(", comparing results from builds of dataset 1");
	934	break;
	935	}
	936	println(": " + ex.getMessage() + "\n");
	937	println("here are the datasets:\n");
	938	println("=== Train1 Dataset ===\n"
	939	+ train1.toString() + "\n");
	940	println("=== Train2 Dataset ===\n"
	941	+ train2.toString() + "\n");
	942	}
	943	}
	944
	945	return result;
	946	}
	947
	948	/**
	949	* Checks basic missing value handling of the scheme. If the missing
	950	* values cause an exception to be thrown by the scheme, this will be
	951	* recorded.
	952	*
	953	* @param nominalPredictor if true use nominal predictor attributes
	954	* @param numericPredictor if true use numeric predictor attributes
	955	* @param stringPredictor if true use string predictor attributes
	956	* @param datePredictor if true use date predictor attributes
	957	* @param relationalPredictor if true use relational predictor attributes
	958	* @param multiInstance whether multi-instance is needed
	959	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	960	* @param predictorMissing true if the missing values may be in
	961	* the predictors
	962	* @param classMissing true if the missing values may be in the class
	963	* @param missingLevel the percentage of missing values
	964	* @return index 0 is true if the test was passed, index 1 is true if test
	965	* was acceptable
	966	*/
	967	protected boolean[] canHandleMissing(
	968	boolean nominalPredictor,
	969	boolean numericPredictor,
	970	boolean stringPredictor,
	971	boolean datePredictor,
	972	boolean relationalPredictor,
	973	boolean multiInstance,
	974	int classType,
	975	boolean predictorMissing,
	976	boolean classMissing,
	977	int missingLevel) {
	978
	979	if (missingLevel == 100)
	980	print("100% ");
	981	print("missing");
	982	if (predictorMissing) {
	983	print(" predictor");
	984	if (classMissing)
	985	print(" and");
	986	}
	987	if (classMissing)
	988	print(" class");
	989	print(" values");
	990	printAttributeSummary(
	991	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	992	print("...");
	993	FastVector accepts = new FastVector();
	994	accepts.addElement("missing");
	995	accepts.addElement("value");
	996	accepts.addElement("train");
	997	int numTrain = getNumInstances(), numClasses = 2;
	998
	999	return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
	1000	datePredictor, relationalPredictor,
	1001	multiInstance,
	1002	classType,
	1003	missingLevel, predictorMissing, classMissing,
	1004	numTrain, numClasses,
	1005	accepts);
	1006	}
	1007
	1008	/**
	1009	* Checks whether the associator can handle instance weights.
	1010	* This test compares the associator performance on two datasets
	1011	* that are identical except for the training weights. If the
	1012	* results change, then the associator must be using the weights. It
	1013	* may be possible to get a false positive from this test if the
	1014	* weight changes aren't significant enough to induce a change
	1015	* in associator performance (but the weights are chosen to minimize
	1016	* the likelihood of this).
	1017	*
	1018	* @param nominalPredictor if true use nominal predictor attributes
	1019	* @param numericPredictor if true use numeric predictor attributes
	1020	* @param stringPredictor if true use string predictor attributes
	1021	* @param datePredictor if true use date predictor attributes
	1022	* @param relationalPredictor if true use relational predictor attributes
	1023	* @param multiInstance whether multi-instance is needed
	1024	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1025	* @return index 0 true if the test was passed
	1026	*/
	1027	protected boolean[] instanceWeights(
	1028	boolean nominalPredictor,
	1029	boolean numericPredictor,
	1030	boolean stringPredictor,
	1031	boolean datePredictor,
	1032	boolean relationalPredictor,
	1033	boolean multiInstance,
	1034	int classType) {
	1035
	1036	print("associator uses instance weights");
	1037	printAttributeSummary(
	1038	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	1039	print("...");
	1040	int numTrain = 2*getNumInstances(),
	1041	numClasses = 2, missingLevel = 0;
	1042	boolean predictorMissing = false, classMissing = false;
	1043
	1044	boolean[] result = new boolean[2];
	1045	Instances train = null;
	1046	Associator [] associators = null;
	1047	AssociatorEvaluation evaluationB = null;
	1048	AssociatorEvaluation evaluationI = null;
	1049	boolean evalFail = false;
	1050	try {
	1051	train = makeTestDataset(42, numTrain,
	1052	nominalPredictor ? getNumNominal() + 1 : 0,
	1053	numericPredictor ? getNumNumeric() + 1 : 0,
	1054	stringPredictor ? getNumString() : 0,
	1055	datePredictor ? getNumDate() : 0,
	1056	relationalPredictor ? getNumRelational() : 0,
	1057	numClasses,
	1058	classType,
	1059	multiInstance);
	1060	if (missingLevel > 0)
	1061	addMissing(train, missingLevel, predictorMissing, classMissing);
	1062	associators = AbstractAssociator.makeCopies(getAssociator(), 2);
	1063	evaluationB = new AssociatorEvaluation();
	1064	evaluationI = new AssociatorEvaluation();
	1065	evaluationB.evaluate(associators[0], train);
	1066	} catch (Exception ex) {
	1067	throw new Error("Error setting up for tests: " + ex.getMessage());
	1068	}
	1069	try {
	1070
	1071	// Now modify instance weights and re-built/test
	1072	for (int i = 0; i < train.numInstances(); i++) {
	1073	train.instance(i).setWeight(0);
	1074	}
	1075	Random random = new Random(1);
	1076	for (int i = 0; i < train.numInstances() / 2; i++) {
	1077	int inst = Math.abs(random.nextInt()) % train.numInstances();
	1078	int weight = Math.abs(random.nextInt()) % 10 + 1;
	1079	train.instance(inst).setWeight(weight);
	1080	}
	1081	evaluationI.evaluate(associators[1], train);
	1082	if (evaluationB.equals(evaluationI)) {
	1083	// println("no");
	1084	evalFail = true;
	1085	throw new Exception("evalFail");
	1086	}
	1087
	1088	println("yes");
	1089	result[0] = true;
	1090	} catch (Exception ex) {
	1091	println("no");
	1092	result[0] = false;
	1093
	1094	if (m_Debug) {
	1095	println("\n=== Full Report ===");
	1096
	1097	if (evalFail) {
	1098	println("Results don't differ between non-weighted and "
	1099	+ "weighted instance models.");
	1100	println("Here are the results:\n");
	1101	println(evaluationB.toSummaryString("\nboth methods\n"));
	1102	} else {
	1103	print("Problem during building");
	1104	println(": " + ex.getMessage() + "\n");
	1105	}
	1106	println("Here is the dataset:\n");
	1107	println("=== Train Dataset ===\n"
	1108	+ train.toString() + "\n");
	1109	println("=== Train Weights ===\n");
	1110	for (int i = 0; i < train.numInstances(); i++) {
	1111	println(" " + (i + 1)
	1112	+ " " + train.instance(i).weight());
	1113	}
	1114	}
	1115	}
	1116
	1117	return result;
	1118	}
	1119
	1120	/**
	1121	* Checks whether the scheme alters the training dataset during
	1122	* building. If the scheme needs to modify the data it should take
	1123	* a copy of the training data. Currently checks for changes to header
	1124	* structure, number of instances, order of instances, instance weights.
	1125	*
	1126	* @param nominalPredictor if true use nominal predictor attributes
	1127	* @param numericPredictor if true use numeric predictor attributes
	1128	* @param stringPredictor if true use string predictor attributes
	1129	* @param datePredictor if true use date predictor attributes
	1130	* @param relationalPredictor if true use relational predictor attributes
	1131	* @param multiInstance whether multi-instance is needed
	1132	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1133	* @param predictorMissing true if we know the associator can handle
	1134	* (at least) moderate missing predictor values
	1135	* @param classMissing true if we know the associator can handle
	1136	* (at least) moderate missing class values
	1137	* @return index 0 is true if the test was passed
	1138	*/
	1139	protected boolean[] datasetIntegrity(
	1140	boolean nominalPredictor,
	1141	boolean numericPredictor,
	1142	boolean stringPredictor,
	1143	boolean datePredictor,
	1144	boolean relationalPredictor,
	1145	boolean multiInstance,
	1146	int classType,
	1147	boolean predictorMissing,
	1148	boolean classMissing) {
	1149
	1150	print("associator doesn't alter original datasets");
	1151	printAttributeSummary(
	1152	nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
	1153	print("...");
	1154	int numTrain = getNumInstances(),
	1155	numClasses = 2, missingLevel = 20;
	1156
	1157	boolean[] result = new boolean[2];
	1158	Instances train = null;
	1159	Associator associator = null;
	1160	try {
	1161	train = makeTestDataset(42, numTrain,
	1162	nominalPredictor ? getNumNominal() : 0,
	1163	numericPredictor ? getNumNumeric() : 0,
	1164	stringPredictor ? getNumString() : 0,
	1165	datePredictor ? getNumDate() : 0,
	1166	relationalPredictor ? getNumRelational() : 0,
	1167	numClasses,
	1168	classType,
	1169	multiInstance);
	1170	if (missingLevel > 0)
	1171	addMissing(train, missingLevel, predictorMissing, classMissing);
	1172	associator = AbstractAssociator.makeCopies(getAssociator(), 1)[0];
	1173	} catch (Exception ex) {
	1174	throw new Error("Error setting up for tests: " + ex.getMessage());
	1175	}
	1176	try {
	1177	Instances trainCopy = new Instances(train);
	1178	associator.buildAssociations(trainCopy);
	1179	compareDatasets(train, trainCopy);
	1180
	1181	println("yes");
	1182	result[0] = true;
	1183	} catch (Exception ex) {
	1184	println("no");
	1185	result[0] = false;
	1186
	1187	if (m_Debug) {
	1188	println("\n=== Full Report ===");
	1189	print("Problem during building");
	1190	println(": " + ex.getMessage() + "\n");
	1191	println("Here is the dataset:\n");
	1192	println("=== Train Dataset ===\n"
	1193	+ train.toString() + "\n");
	1194	}
	1195	}
	1196
	1197	return result;
	1198	}
	1199
	1200	/**
	1201	* Runs a text on the datasets with the given characteristics.
	1202	*
	1203	* @param nominalPredictor if true use nominal predictor attributes
	1204	* @param numericPredictor if true use numeric predictor attributes
	1205	* @param stringPredictor if true use string predictor attributes
	1206	* @param datePredictor if true use date predictor attributes
	1207	* @param relationalPredictor if true use relational predictor attributes
	1208	* @param multiInstance whether multi-instance is needed
	1209	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1210	* @param missingLevel the percentage of missing values
	1211	* @param predictorMissing true if the missing values may be in
	1212	* the predictors
	1213	* @param classMissing true if the missing values may be in the class
	1214	* @param numTrain the number of instances in the training set
	1215	* @param numClasses the number of classes
	1216	* @param accepts the acceptable string in an exception
	1217	* @return index 0 is true if the test was passed, index 1 is true if test
	1218	* was acceptable
	1219	*/
	1220	protected boolean[] runBasicTest(boolean nominalPredictor,
	1221	boolean numericPredictor,
	1222	boolean stringPredictor,
	1223	boolean datePredictor,
	1224	boolean relationalPredictor,
	1225	boolean multiInstance,
	1226	int classType,
	1227	int missingLevel,
	1228	boolean predictorMissing,
	1229	boolean classMissing,
	1230	int numTrain,
	1231	int numClasses,
	1232	FastVector accepts) {
	1233
	1234	return runBasicTest(
	1235	nominalPredictor,
	1236	numericPredictor,
	1237	stringPredictor,
	1238	datePredictor,
	1239	relationalPredictor,
	1240	multiInstance,
	1241	classType,
	1242	TestInstances.CLASS_IS_LAST,
	1243	missingLevel,
	1244	predictorMissing,
	1245	classMissing,
	1246	numTrain,
	1247	numClasses,
	1248	accepts);
	1249	}
	1250
	1251	/**
	1252	* Runs a text on the datasets with the given characteristics.
	1253	*
	1254	* @param nominalPredictor if true use nominal predictor attributes
	1255	* @param numericPredictor if true use numeric predictor attributes
	1256	* @param stringPredictor if true use string predictor attributes
	1257	* @param datePredictor if true use date predictor attributes
	1258	* @param relationalPredictor if true use relational predictor attributes
	1259	* @param multiInstance whether multi-instance is needed
	1260	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1261	* @param classIndex the attribute index of the class
	1262	* @param missingLevel the percentage of missing values
	1263	* @param predictorMissing true if the missing values may be in
	1264	* the predictors
	1265	* @param classMissing true if the missing values may be in the class
	1266	* @param numTrain the number of instances in the training set
	1267	* @param numClasses the number of classes
	1268	* @param accepts the acceptable string in an exception
	1269	* @return index 0 is true if the test was passed, index 1 is true if test
	1270	* was acceptable
	1271	*/
	1272	protected boolean[] runBasicTest(boolean nominalPredictor,
	1273	boolean numericPredictor,
	1274	boolean stringPredictor,
	1275	boolean datePredictor,
	1276	boolean relationalPredictor,
	1277	boolean multiInstance,
	1278	int classType,
	1279	int classIndex,
	1280	int missingLevel,
	1281	boolean predictorMissing,
	1282	boolean classMissing,
	1283	int numTrain,
	1284	int numClasses,
	1285	FastVector accepts) {
	1286
	1287	boolean[] result = new boolean[2];
	1288	Instances train = null;
	1289	Associator associator = null;
	1290	try {
	1291	train = makeTestDataset(42, numTrain,
	1292	nominalPredictor ? getNumNominal() : 0,
	1293	numericPredictor ? getNumNumeric() : 0,
	1294	stringPredictor ? getNumString() : 0,
	1295	datePredictor ? getNumDate() : 0,
	1296	relationalPredictor ? getNumRelational() : 0,
	1297	numClasses,
	1298	classType,
	1299	classIndex,
	1300	multiInstance);
	1301	if (missingLevel > 0)
	1302	addMissing(train, missingLevel, predictorMissing, classMissing);
	1303	associator = AbstractAssociator.makeCopies(getAssociator(), 1)[0];
	1304	} catch (Exception ex) {
	1305	ex.printStackTrace();
	1306	throw new Error("Error setting up for tests: " + ex.getMessage());
	1307	}
	1308	try {
	1309	associator.buildAssociations(train);
	1310	println("yes");
	1311	result[0] = true;
	1312	}
	1313	catch (Exception ex) {
	1314	boolean acceptable = false;
	1315	String msg;
	1316	if (ex.getMessage() == null)
	1317	msg = "";
	1318	else
	1319	msg = ex.getMessage().toLowerCase();
	1320	if (msg.indexOf("not in classpath") > -1)
	1321	m_ClasspathProblems = true;
	1322
	1323	for (int i = 0; i < accepts.size(); i++) {
	1324	if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {
	1325	acceptable = true;
	1326	}
	1327	}
	1328
	1329	println("no" + (acceptable ? " (OK error message)" : ""));
	1330	result[1] = acceptable;
	1331
	1332	if (m_Debug) {
	1333	println("\n=== Full Report ===");
	1334	print("Problem during building");
	1335	println(": " + ex.getMessage() + "\n");
	1336	if (!acceptable) {
	1337	if (accepts.size() > 0) {
	1338	print("Error message doesn't mention ");
	1339	for (int i = 0; i < accepts.size(); i++) {
	1340	if (i != 0) {
	1341	print(" or ");
	1342	}
	1343	print('"' + (String)accepts.elementAt(i) + '"');
	1344	}
	1345	}
	1346	println("here is the dataset:\n");
	1347	println("=== Train Dataset ===\n"
	1348	+ train.toString() + "\n");
	1349	}
	1350	}
	1351	}
	1352
	1353	return result;
	1354	}
	1355
	1356	/**
	1357	* Make a simple set of instances, which can later be modified
	1358	* for use in specific tests.
	1359	*
	1360	* @param seed the random number seed
	1361	* @param numInstances the number of instances to generate
	1362	* @param numNominal the number of nominal attributes
	1363	* @param numNumeric the number of numeric attributes
	1364	* @param numString the number of string attributes
	1365	* @param numDate the number of date attributes
	1366	* @param numRelational the number of relational attributes
	1367	* @param numClasses the number of classes (if nominal class)
	1368	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1369	* @param multiInstance whether the dataset should a multi-instance dataset
	1370	* @return the test dataset
	1371	* @throws Exception if the dataset couldn't be generated
	1372	* @see #process(Instances)
	1373	*/
	1374	protected Instances makeTestDataset(int seed, int numInstances,
	1375	int numNominal, int numNumeric,
	1376	int numString, int numDate,
	1377	int numRelational,
	1378	int numClasses, int classType,
	1379	boolean multiInstance)
	1380	throws Exception {
	1381
	1382	return makeTestDataset(
	1383	seed,
	1384	numInstances,
	1385	numNominal,
	1386	numNumeric,
	1387	numString,
	1388	numDate,
	1389	numRelational,
	1390	numClasses,
	1391	classType,
	1392	TestInstances.CLASS_IS_LAST,
	1393	multiInstance);
	1394	}
	1395
	1396	/**
	1397	* Make a simple set of instances with variable position of the class
	1398	* attribute, which can later be modified for use in specific tests.
	1399	*
	1400	* @param seed the random number seed
	1401	* @param numInstances the number of instances to generate
	1402	* @param numNominal the number of nominal attributes
	1403	* @param numNumeric the number of numeric attributes
	1404	* @param numString the number of string attributes
	1405	* @param numDate the number of date attributes
	1406	* @param numRelational the number of relational attributes
	1407	* @param numClasses the number of classes (if nominal class)
	1408	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1409	* @param classIndex the index of the class (0-based, -1 as last)
	1410	* @param multiInstance whether the dataset should a multi-instance dataset
	1411	* @return the test dataset
	1412	* @throws Exception if the dataset couldn't be generated
	1413	* @see TestInstances#CLASS_IS_LAST
	1414	* @see #process(Instances)
	1415	*/
	1416	protected Instances makeTestDataset(int seed, int numInstances,
	1417	int numNominal, int numNumeric,
	1418	int numString, int numDate,
	1419	int numRelational,
	1420	int numClasses, int classType,
	1421	int classIndex,
	1422	boolean multiInstance)
	1423	throws Exception {
	1424
	1425	TestInstances dataset = new TestInstances();
	1426
	1427	dataset.setSeed(seed);
	1428	dataset.setNumInstances(numInstances);
	1429	dataset.setNumNominal(numNominal);
	1430	dataset.setNumNumeric(numNumeric);
	1431	dataset.setNumString(numString);
	1432	dataset.setNumDate(numDate);
	1433	dataset.setNumRelational(numRelational);
	1434	dataset.setNumClasses(numClasses);
	1435	if (classType == NO_CLASS) {
	1436	dataset.setClassType(Attribute.NOMINAL); // ignored
	1437	dataset.setClassIndex(TestInstances.NO_CLASS);
	1438	}
	1439	else {
	1440	dataset.setClassType(classType);
	1441	dataset.setClassIndex(classIndex);
	1442	}
	1443	dataset.setNumClasses(numClasses);
	1444	dataset.setMultiInstance(multiInstance);
	1445	dataset.setWords(getWords());
	1446	dataset.setWordSeparators(getWordSeparators());
	1447
	1448	return process(dataset.generate());
	1449	}
	1450
	1451	/**
	1452	* Print out a short summary string for the dataset characteristics
	1453	*
	1454	* @param nominalPredictor true if nominal predictor attributes are present
	1455	* @param numericPredictor true if numeric predictor attributes are present
	1456	* @param stringPredictor true if string predictor attributes are present
	1457	* @param datePredictor true if date predictor attributes are present
	1458	* @param relationalPredictor true if relational predictor attributes are present
	1459	* @param multiInstance whether multi-instance is needed
	1460	* @param classType the class type (NUMERIC, NOMINAL, etc.)
	1461	*/
	1462	protected void printAttributeSummary(boolean nominalPredictor,
	1463	boolean numericPredictor,
	1464	boolean stringPredictor,
	1465	boolean datePredictor,
	1466	boolean relationalPredictor,
	1467	boolean multiInstance,
	1468	int classType) {
	1469
	1470	String str = "";
	1471
	1472	if (numericPredictor)
	1473	str += " numeric";
	1474
	1475	if (nominalPredictor) {
	1476	if (str.length() > 0)
	1477	str += " &";
	1478	str += " nominal";
	1479	}
	1480
	1481	if (stringPredictor) {
	1482	if (str.length() > 0)
	1483	str += " &";
	1484	str += " string";
	1485	}
	1486
	1487	if (datePredictor) {
	1488	if (str.length() > 0)
	1489	str += " &";
	1490	str += " date";
	1491	}
	1492
	1493	if (relationalPredictor) {
	1494	if (str.length() > 0)
	1495	str += " &";
	1496	str += " relational";
	1497	}
	1498
	1499	str += " predictors)";
	1500
	1501	switch (classType) {
	1502	case Attribute.NUMERIC:
	1503	str = " (numeric class," + str;
	1504	break;
	1505	case Attribute.NOMINAL:
	1506	str = " (nominal class," + str;
	1507	break;
	1508	case Attribute.STRING:
	1509	str = " (string class," + str;
	1510	break;
	1511	case Attribute.DATE:
	1512	str = " (date class," + str;
	1513	break;
	1514	case Attribute.RELATIONAL:
	1515	str = " (relational class," + str;
	1516	break;
	1517	case NO_CLASS:
	1518	str = " (no class," + str;
	1519	break;
	1520	}
	1521
	1522	print(str);
	1523	}
	1524
	1525	/**
	1526	* Returns the revision string.
	1527	*
	1528	* @return the revision
	1529	*/
	1530	public String getRevision() {
	1531	return RevisionUtils.extract("$Revision: 1.7 $");
	1532	}
	1533
	1534	/**
	1535	* Test method for this class
	1536	*
	1537	* @param args the commandline parameters
	1538	*/
	1539	public static void main(String [] args) {
	1540	runCheck(new CheckAssociator(), args);
	1541	}
	1542	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/MetisMQI/src/main/java/weka/associations/CheckAssociator.java

Download in other formats: