Context Navigation

source: src/main/java/weka/associations/Apriori.java @ 25

Last change on this file since 25 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 48.2 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* Apriori.java
19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23	package weka.associations;
24
25	import weka.core.AttributeStats;
26	import weka.core.Capabilities;
27	import weka.core.FastVector;
28	import weka.core.Instances;
29	import weka.core.Option;
30	import weka.core.OptionHandler;
31	import weka.core.RevisionUtils;
32	import weka.core.SelectedTag;
33	import weka.core.Tag;
34	import weka.core.TechnicalInformation;
35	import weka.core.TechnicalInformationHandler;
36	import weka.core.Utils;
37	import weka.core.Capabilities.Capability;
38	import weka.core.TechnicalInformation.Field;
39	import weka.core.TechnicalInformation.Type;
40	import weka.filters.Filter;
41	import weka.filters.unsupervised.attribute.Remove;
42
43	import java.util.Enumeration;
44	import java.util.Hashtable;
45
46	/**
47	<!-- globalinfo-start -->
48	* Class implementing an Apriori-type algorithm. Iteratively reduces the minimum support until it finds the required number of rules with the given minimum confidence.<br/>
49	* The algorithm has an option to mine class association rules. It is adapted as explained in the second reference.<br/>
50	* <br/>
51	* For more information see:<br/>
52	* <br/>
53	* R. Agrawal, R. Srikant: Fast Algorithms for Mining Association Rules in Large Databases. In: 20th International Conference on Very Large Data Bases, 478-499, 1994.<br/>
54	* <br/>
55	* Bing Liu, Wynne Hsu, Yiming Ma: Integrating Classification and Association Rule Mining. In: Fourth International Conference on Knowledge Discovery and Data Mining, 80-86, 1998.
56	* <p/>
57	<!-- globalinfo-end -->
58	*
59	<!-- technical-bibtex-start -->
60	* BibTeX:
61	* <pre>
62	* @inproceedings{Agrawal1994,
63	* author = {R. Agrawal and R. Srikant},
64	* booktitle = {20th International Conference on Very Large Data Bases},
65	* pages = {478-499},
66	* publisher = {Morgan Kaufmann, Los Altos, CA},
67	* title = {Fast Algorithms for Mining Association Rules in Large Databases},
68	* year = {1994}
69	* }
70	*
71	* @inproceedings{Liu1998,
72	* author = {Bing Liu and Wynne Hsu and Yiming Ma},
73	* booktitle = {Fourth International Conference on Knowledge Discovery and Data Mining},
74	* pages = {80-86},
75	* publisher = {AAAI Press},
76	* title = {Integrating Classification and Association Rule Mining},
77	* year = {1998}
78	* }
79	* </pre>
80	* <p/>
81	<!-- technical-bibtex-end -->
82	*
83	<!-- options-start -->
84	* Valid options are: <p/>
85	*
86	* <pre> -N <required number of rules output>
87	* The required number of rules. (default = 10)</pre>
88	*
89	* <pre> -T <0=confidence \| 1=lift \| 2=leverage \| 3=Conviction>
90	* The metric type by which to rank rules. (default = confidence)</pre>
91	*
92	* <pre> -C <minimum metric score of a rule>
93	* The minimum confidence of a rule. (default = 0.9)</pre>
94	*
95	* <pre> -D <delta for minimum support>
96	* The delta by which the minimum support is decreased in
97	* each iteration. (default = 0.05)</pre>
98	*
99	* <pre> -U <upper bound for minimum support>
100	* Upper bound for minimum support. (default = 1.0)</pre>
101	*
102	* <pre> -M <lower bound for minimum support>
103	* The lower bound for the minimum support. (default = 0.1)</pre>
104	*
105	* <pre> -S <significance level>
106	* If used, rules are tested for significance at
107	* the given level. Slower. (default = no significance testing)</pre>
108	*
109	* <pre> -I
110	* If set the itemsets found are also output. (default = no)</pre>
111	*
112	* <pre> -R
113	* Remove columns that contain all missing values (default = no)</pre>
114	*
115	* <pre> -V
116	* Report progress iteratively. (default = no)</pre>
117	*
118	* <pre> -A
119	* If set class association rules are mined. (default = no)</pre>
120	*
121	* <pre> -c <the class index>
122	* The class index. (default = last)</pre>
123	*
124	<!-- options-end -->
125	*
126	* @author Eibe Frank (eibe@cs.waikato.ac.nz)
127	* @author Mark Hall (mhall@cs.waikato.ac.nz)
128	* @author Stefan Mutter (mutter@cs.waikato.ac.nz)
129	* @version $Revision: 5698 $
130	*/
131	public class Apriori
132	extends AbstractAssociator
133	implements OptionHandler, CARuleMiner, TechnicalInformationHandler {
134
135	/** for serialization */
136	static final long serialVersionUID = 3277498842319212687L;
137
138	/** The minimum support. */
139	protected double m_minSupport;
140
141	/** The upper bound on the support */
142	protected double m_upperBoundMinSupport;
143
144	/** The lower bound for the minimum support. */
145	protected double m_lowerBoundMinSupport;
146
147	/** Metric type: Confidence */
148	protected static final int CONFIDENCE = 0;
149	/** Metric type: Lift */
150	protected static final int LIFT = 1;
151	/** Metric type: Leverage */
152	protected static final int LEVERAGE = 2;
153	/** Metric type: Conviction */
154	protected static final int CONVICTION = 3;
155	/** Metric types. */
156	public static final Tag [] TAGS_SELECTION = {
157	new Tag(CONFIDENCE, "Confidence"),
158	new Tag(LIFT, "Lift"),
159	new Tag(LEVERAGE, "Leverage"),
160	new Tag(CONVICTION, "Conviction")
161	};
162
163	/** The selected metric type. */
164	protected int m_metricType = CONFIDENCE;
165
166	/** The minimum metric score. */
167	protected double m_minMetric;
168
169	/** The maximum number of rules that are output. */
170	protected int m_numRules;
171
172	/** Delta by which m_minSupport is decreased in each iteration. */
173	protected double m_delta;
174
175	/** Significance level for optional significance test. */
176	protected double m_significanceLevel;
177
178	/** Number of cycles used before required number of rules was one. */
179	protected int m_cycles;
180
181	/** The set of all sets of itemsets L. */
182	protected FastVector m_Ls;
183
184	/** The same information stored in hash tables. */
185	protected FastVector m_hashtables;
186
187	/** The list of all generated rules. */
188	protected FastVector[] m_allTheRules;
189
190	/** The instances (transactions) to be used for generating
191	the association rules. */
192	protected Instances m_instances;
193
194	/** Output itemsets found? */
195	protected boolean m_outputItemSets;
196
197	/** Remove columns with all missing values */
198	protected boolean m_removeMissingCols;
199
200	/** Report progress iteratively */
201	protected boolean m_verbose;
202
203	/** Only the class attribute of all Instances.*/
204	protected Instances m_onlyClass;
205
206	/** The class index. */
207	protected int m_classIndex;
208
209	/** Flag indicating whether class association rules are mined. */
210	protected boolean m_car;
211
212	/**
213	* Treat zeros as missing (rather than a value in their
214	* own right)
215	*/
216	protected boolean m_treatZeroAsMissing = false;
217
218	/**
219	* Returns a string describing this associator
220	* @return a description of the evaluator suitable for
221	* displaying in the explorer/experimenter gui
222	*/
223	public String globalInfo() {
224	return
225	"Class implementing an Apriori-type algorithm. Iteratively reduces "
226	+ "the minimum support until it finds the required number of rules with "
227	+ "the given minimum confidence.\n"
228	+ "The algorithm has an option to mine class association rules. It is "
229	+ "adapted as explained in the second reference.\n\n"
230	+ "For more information see:\n\n"
231	+ getTechnicalInformation().toString();
232	}
233
234	/**
235	* Returns an instance of a TechnicalInformation object, containing
236	* detailed information about the technical background of this class,
237	* e.g., paper reference or book this class is based on.
238	*
239	* @return the technical information about this class
240	*/
241	public TechnicalInformation getTechnicalInformation() {
242	TechnicalInformation result;
243	TechnicalInformation additional;
244
245	result = new TechnicalInformation(Type.INPROCEEDINGS);
246	result.setValue(Field.AUTHOR, "R. Agrawal and R. Srikant");
247	result.setValue(Field.TITLE, "Fast Algorithms for Mining Association Rules in Large Databases");
248	result.setValue(Field.BOOKTITLE, "20th International Conference on Very Large Data Bases");
249	result.setValue(Field.YEAR, "1994");
250	result.setValue(Field.PAGES, "478-499");
251	result.setValue(Field.PUBLISHER, "Morgan Kaufmann, Los Altos, CA");
252
253	additional = result.add(Type.INPROCEEDINGS);
254	additional.setValue(Field.AUTHOR, "Bing Liu and Wynne Hsu and Yiming Ma");
255	additional.setValue(Field.TITLE, "Integrating Classification and Association Rule Mining");
256	additional.setValue(Field.BOOKTITLE, "Fourth International Conference on Knowledge Discovery and Data Mining");
257	additional.setValue(Field.YEAR, "1998");
258	additional.setValue(Field.PAGES, "80-86");
259	additional.setValue(Field.PUBLISHER, "AAAI Press");
260
261	return result;
262	}
263
264	/**
265	* Constructor that allows to sets default values for the
266	* minimum confidence and the maximum number of rules
267	* the minimum confidence.
268	*/
269	public Apriori() {
270
271	resetOptions();
272	}
273
274	/**
275	* Resets the options to the default values.
276	*/
277	public void resetOptions() {
278
279	m_removeMissingCols = false;
280	m_verbose = false;
281	m_delta = 0.05;
282	m_minMetric = 0.90;
283	m_numRules = 10;
284	m_lowerBoundMinSupport = 0.1;
285	m_upperBoundMinSupport = 1.0;
286	m_significanceLevel = -1;
287	m_outputItemSets = false;
288	m_car = false;
289	m_classIndex = -1;
290	}
291
292	/**
293	* Removes columns that are all missing from the data
294	* @param instances the instances
295	* @return a new set of instances with all missing columns removed
296	* @throws Exception if something goes wrong
297	*/
298	protected Instances removeMissingColumns(Instances instances)
299	throws Exception {
300
301	int numInstances = instances.numInstances();
302	StringBuffer deleteString = new StringBuffer();
303	int removeCount = 0;
304	boolean first = true;
305	int maxCount = 0;
306
307	for (int i=0;i<instances.numAttributes();i++) {
308	AttributeStats as = instances.attributeStats(i);
309	if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
310	// see if we can decrease this by looking for the most frequent value
311	int [] counts = as.nominalCounts;
312	if (counts[Utils.maxIndex(counts)] > maxCount) {
313	maxCount = counts[Utils.maxIndex(counts)];
314	}
315	}
316	if (as.missingCount == numInstances) {
317	if (first) {
318	deleteString.append((i+1));
319	first = false;
320	} else {
321	deleteString.append(","+(i+1));
322	}
323	removeCount++;
324	}
325	}
326	if (m_verbose) {
327	System.err.println("Removed : "+removeCount+" columns with all missing "
328	+"values.");
329	}
330	if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
331	m_upperBoundMinSupport = (double)maxCount / (double)numInstances;
332	if (m_verbose) {
333	System.err.println("Setting upper bound min support to : "
334	+m_upperBoundMinSupport);
335	}
336	}
337
338	if (deleteString.toString().length() > 0) {
339	Remove af = new Remove();
340	af.setAttributeIndices(deleteString.toString());
341	af.setInvertSelection(false);
342	af.setInputFormat(instances);
343	Instances newInst = Filter.useFilter(instances, af);
344
345	return newInst;
346	}
347	return instances;
348	}
349
350	/**
351	* Returns default capabilities of the classifier.
352	*
353	* @return the capabilities of this classifier
354	*/
355	public Capabilities getCapabilities() {
356	Capabilities result = super.getCapabilities();
357	result.disableAll();
358
359	// enable what we can handle
360
361	// attributes
362	result.enable(Capability.NOMINAL_ATTRIBUTES);
363	result.enable(Capability.MISSING_VALUES);
364
365	// class (can handle a nominal class if CAR rules are selected). This
366	result.enable(Capability.NO_CLASS);
367	result.enable(Capability.NOMINAL_CLASS);
368	result.enable(Capability.MISSING_CLASS_VALUES);
369
370	return result;
371	}
372
373	/**
374	* Method that generates all large itemsets with a minimum support, and from
375	* these all association rules with a minimum confidence.
376	*
377	* @param instances the instances to be used for generating the associations
378	* @throws Exception if rules can't be built successfully
379	*/
380	public void buildAssociations(Instances instances) throws Exception {
381
382	double[] confidences, supports;
383	int[] indices;
384	FastVector[] sortedRuleSet;
385	double necSupport=0;
386
387	instances = new Instances(instances);
388
389	if (m_removeMissingCols) {
390	instances = removeMissingColumns(instances);
391	}
392	if(m_car && m_metricType != CONFIDENCE)
393	throw new Exception("For CAR-Mining metric type has to be confidence!");
394
395	// only set class index if CAR is requested
396	if (m_car) {
397	if (m_classIndex == -1 ) {
398	instances.setClassIndex(instances.numAttributes()-1);
399	} else if (m_classIndex <= instances.numAttributes() && m_classIndex > 0) {
400	instances.setClassIndex(m_classIndex - 1);
401	} else {
402	throw new Exception("Invalid class index.");
403	}
404	}
405
406	// can associator handle the data?
407	getCapabilities().testWithFail(instances);
408
409	m_cycles = 0;
410
411	// make sure that the lower bound is equal to at least one instance
412	double lowerBoundMinSupportToUse =
413	(m_lowerBoundMinSupport * (double)instances.numInstances() < 1.0)
414	? 1.0 / (double)instances.numInstances()
415	: m_lowerBoundMinSupport;
416
417	if(m_car){
418	//m_instances does not contain the class attribute
419	m_instances = LabeledItemSet.divide(instances,false);
420
421	//m_onlyClass contains only the class attribute
422	m_onlyClass = LabeledItemSet.divide(instances,true);
423	}
424	else
425	m_instances = instances;
426
427	if(m_car && m_numRules == Integer.MAX_VALUE){
428	// Set desired minimum support
429	m_minSupport = lowerBoundMinSupportToUse;
430	}
431	else{
432	// Decrease minimum support until desired number of rules found.
433	m_minSupport = m_upperBoundMinSupport - m_delta;
434	m_minSupport = (m_minSupport < lowerBoundMinSupportToUse)
435	? lowerBoundMinSupportToUse
436	: m_minSupport;
437	}
438
439	do {
440
441	// Reserve space for variables
442	m_Ls = new FastVector();
443	m_hashtables = new FastVector();
444	m_allTheRules = new FastVector[6];
445	m_allTheRules[0] = new FastVector();
446	m_allTheRules[1] = new FastVector();
447	m_allTheRules[2] = new FastVector();
448	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
449	m_allTheRules[3] = new FastVector();
450	m_allTheRules[4] = new FastVector();
451	m_allTheRules[5] = new FastVector();
452	}
453	sortedRuleSet = new FastVector[6];
454	sortedRuleSet[0] = new FastVector();
455	sortedRuleSet[1] = new FastVector();
456	sortedRuleSet[2] = new FastVector();
457	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
458	sortedRuleSet[3] = new FastVector();
459	sortedRuleSet[4] = new FastVector();
460	sortedRuleSet[5] = new FastVector();
461	}
462	if(!m_car){
463	// Find large itemsets and rules
464	findLargeItemSets();
465	if (m_significanceLevel != -1 \|\| m_metricType != CONFIDENCE)
466	findRulesBruteForce();
467	else
468	findRulesQuickly();
469	}
470	else{
471	findLargeCarItemSets();
472	findCarRulesQuickly();
473	}
474
475	// Sort rules according to their support
476	/*supports = new double[m_allTheRules[2].size()];
477	for (int i = 0; i < m_allTheRules[2].size(); i++)
478	supports[i] = (double)((AprioriItemSet)m_allTheRules[1].elementAt(i)).support();
479	indices = Utils.stableSort(supports);
480	for (int i = 0; i < m_allTheRules[2].size(); i++) {
481	sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i]));
482	sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i]));
483	sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i]));
484	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
485	sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[i]));
486	sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[i]));
487	sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[i]));
488	}
489	}*/
490	int j = m_allTheRules[2].size()-1;
491	supports = new double[m_allTheRules[2].size()];
492	for (int i = 0; i < (j+1); i++)
493	supports[j-i] = ((double)((ItemSet)m_allTheRules[1].elementAt(j-i)).support())*(-1);
494	indices = Utils.stableSort(supports);
495	for (int i = 0; i < (j+1); i++) {
496	sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[j-i]));
497	sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[j-i]));
498	sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[j-i]));
499	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
500	sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[j-i]));
501	sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[j-i]));
502	sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[j-i]));
503	}
504	}
505
506	// Sort rules according to their confidence
507	m_allTheRules[0].removeAllElements();
508	m_allTheRules[1].removeAllElements();
509	m_allTheRules[2].removeAllElements();
510	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
511	m_allTheRules[3].removeAllElements();
512	m_allTheRules[4].removeAllElements();
513	m_allTheRules[5].removeAllElements();
514	}
515	confidences = new double[sortedRuleSet[2].size()];
516	int sortType = 2 + m_metricType;
517
518	for (int i = 0; i < sortedRuleSet[2].size(); i++)
519	confidences[i] =
520	((Double)sortedRuleSet[sortType].elementAt(i)).doubleValue();
521	indices = Utils.stableSort(confidences);
522	for (int i = sortedRuleSet[0].size() - 1;
523	(i >= (sortedRuleSet[0].size() - m_numRules)) && (i >= 0); i--) {
524	m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i]));
525	m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i]));
526	m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i]));
527	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
528	m_allTheRules[3].addElement(sortedRuleSet[3].elementAt(indices[i]));
529	m_allTheRules[4].addElement(sortedRuleSet[4].elementAt(indices[i]));
530	m_allTheRules[5].addElement(sortedRuleSet[5].elementAt(indices[i]));
531	}
532	}
533
534	if (m_verbose) {
535	if (m_Ls.size() > 1) {
536	System.out.println(toString());
537	}
538	}
539	if(m_minSupport == lowerBoundMinSupportToUse \|\| m_minSupport - m_delta > lowerBoundMinSupportToUse)
540	m_minSupport -= m_delta;
541	else
542	m_minSupport = lowerBoundMinSupportToUse;
543
544
545	necSupport = Math.rint(m_minSupport * (double)m_instances.numInstances());
546
547	m_cycles++;
548	} while ((m_allTheRules[0].size() < m_numRules) &&
549	(Utils.grOrEq(m_minSupport, lowerBoundMinSupportToUse))
550	/* (necSupport >= lowerBoundNumInstancesSupport)*/
551	/* (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) */ &&
552	(necSupport >= 1));
553	m_minSupport += m_delta;
554	}
555
556
557	/**
558	* Method that mines all class association rules with minimum support and
559	* with a minimum confidence.
560	* @return an sorted array of FastVector (confidence depended) containing the rules and metric information
561	* @param data the instances for which class association rules should be mined
562	* @throws Exception if rules can't be built successfully
563	*/
564	public FastVector[] mineCARs(Instances data) throws Exception{
565
566	m_car = true;
567	buildAssociations(data);
568	return m_allTheRules;
569	}
570
571	/**
572	* Gets the instances without the class atrribute.
573	*
574	* @return the instances without the class attribute.
575	*/
576	public Instances getInstancesNoClass() {
577
578	return m_instances;
579	}
580
581
582	/**
583	* Gets only the class attribute of the instances.
584	*
585	* @return the class attribute of all instances.
586	*/
587	public Instances getInstancesOnlyClass() {
588
589	return m_onlyClass;
590	}
591
592
593	/**
594	* Returns an enumeration describing the available options.
595	*
596	* @return an enumeration of all the available options.
597	*/
598	public Enumeration listOptions() {
599
600	String string1 = "\tThe required number of rules. (default = " + m_numRules + ")",
601	string2 =
602	"\tThe minimum confidence of a rule. (default = " + m_minMetric + ")",
603	string3 = "\tThe delta by which the minimum support is decreased in\n",
604	string4 = "\teach iteration. (default = " + m_delta + ")",
605	string5 =
606	"\tThe lower bound for the minimum support. (default = " +
607	m_lowerBoundMinSupport + ")",
608	string6 = "\tIf used, rules are tested for significance at\n",
609	string7 = "\tthe given level. Slower. (default = no significance testing)",
610	string8 = "\tIf set the itemsets found are also output. (default = no)",
611	string9 = "\tIf set class association rules are mined. (default = no)",
612	string10 = "\tThe class index. (default = last)",
613	stringType = "\tThe metric type by which to rank rules. (default = "
614	+"confidence)",
615	stringZeroAsMissing = "\tTreat zero (i.e. first value of nominal attributes) as " +
616	"missing";
617
618
619	FastVector newVector = new FastVector(11);
620
621	newVector.addElement(new Option(string1, "N", 1,
622	"-N <required number of rules output>"));
623	newVector.addElement(new Option(stringType, "T", 1,
624	"-T <0=confidence \| 1=lift \| "
625	+"2=leverage \| 3=Conviction>"));
626	newVector.addElement(new Option(string2, "C", 1,
627	"-C <minimum metric score of a rule>"));
628	newVector.addElement(new Option(string3 + string4, "D", 1,
629	"-D <delta for minimum support>"));
630	newVector.addElement(new Option("\tUpper bound for minimum support. "
631	+"(default = 1.0)", "U", 1,
632	"-U <upper bound for minimum support>"));
633	newVector.addElement(new Option(string5, "M", 1,
634	"-M <lower bound for minimum support>"));
635	newVector.addElement(new Option(string6 + string7, "S", 1,
636	"-S <significance level>"));
637	newVector.addElement(new Option(string8, "I", 0,
638	"-I"));
639	newVector.addElement(new Option("\tRemove columns that contain "
640	+"all missing values (default = no)"
641	, "R", 0,
642	"-R"));
643	newVector.addElement(new Option("\tReport progress iteratively. (default "
644	+"= no)", "V", 0,
645	"-V"));
646	newVector.addElement(new Option(string9, "A", 0,
647	"-A"));
648	newVector.addElement(new Option(stringZeroAsMissing, "Z", 0,
649	"-Z"));
650	newVector.addElement(new Option(string10, "c", 1,
651	"-c <the class index>"));
652
653	return newVector.elements();
654	}
655
656	/**
657	* Parses a given list of options. <p/>
658	*
659	<!-- options-start -->
660	* Valid options are: <p/>
661	*
662	* <pre> -N <required number of rules output>
663	* The required number of rules. (default = 10)</pre>
664	*
665	* <pre> -T <0=confidence \| 1=lift \| 2=leverage \| 3=Conviction>
666	* The metric type by which to rank rules. (default = confidence)</pre>
667	*
668	* <pre> -C <minimum metric score of a rule>
669	* The minimum confidence of a rule. (default = 0.9)</pre>
670	*
671	* <pre> -D <delta for minimum support>
672	* The delta by which the minimum support is decreased in
673	* each iteration. (default = 0.05)</pre>
674	*
675	* <pre> -U <upper bound for minimum support>
676	* Upper bound for minimum support. (default = 1.0)</pre>
677	*
678	* <pre> -M <lower bound for minimum support>
679	* The lower bound for the minimum support. (default = 0.1)</pre>
680	*
681	* <pre> -S <significance level>
682	* If used, rules are tested for significance at
683	* the given level. Slower. (default = no significance testing)</pre>
684	*
685	* <pre> -I
686	* If set the itemsets found are also output. (default = no)</pre>
687	*
688	* <pre> -R
689	* Remove columns that contain all missing values (default = no)</pre>
690	*
691	* <pre> -V
692	* Report progress iteratively. (default = no)</pre>
693	*
694	* <pre> -A
695	* If set class association rules are mined. (default = no)</pre>
696	*
697	* <pre> -c <the class index>
698	* The class index. (default = last)</pre>
699	*
700	<!-- options-end -->
701	*
702	* @param options the list of options as an array of strings
703	* @throws Exception if an option is not supported
704	*/
705	public void setOptions(String[] options) throws Exception {
706
707	resetOptions();
708	String numRulesString = Utils.getOption('N', options),
709	minConfidenceString = Utils.getOption('C', options),
710	deltaString = Utils.getOption('D', options),
711	maxSupportString = Utils.getOption('U', options),
712	minSupportString = Utils.getOption('M', options),
713	significanceLevelString = Utils.getOption('S', options),
714	classIndexString = Utils.getOption('c',options);
715
716	String metricTypeString = Utils.getOption('T', options);
717	if (metricTypeString.length() != 0) {
718	setMetricType(new SelectedTag(Integer.parseInt(metricTypeString),
719	TAGS_SELECTION));
720	}
721
722	if (numRulesString.length() != 0) {
723	m_numRules = Integer.parseInt(numRulesString);
724	}
725	if (classIndexString.length() != 0) {
726	if (classIndexString.equalsIgnoreCase("last")) {
727	m_classIndex = -1;
728	} else if (classIndexString.equalsIgnoreCase("first")) {
729	m_classIndex = 0;
730	} else {
731	m_classIndex = Integer.parseInt(classIndexString);
732	}
733	}
734	if (minConfidenceString.length() != 0) {
735	m_minMetric = (new Double(minConfidenceString)).doubleValue();
736	}
737	if (deltaString.length() != 0) {
738	m_delta = (new Double(deltaString)).doubleValue();
739	}
740	if (maxSupportString.length() != 0) {
741	setUpperBoundMinSupport((new Double(maxSupportString)).doubleValue());
742	}
743	if (minSupportString.length() != 0) {
744	m_lowerBoundMinSupport = (new Double(minSupportString)).doubleValue();
745	}
746	if (significanceLevelString.length() != 0) {
747	m_significanceLevel = (new Double(significanceLevelString)).doubleValue();
748	}
749	m_outputItemSets = Utils.getFlag('I', options);
750	m_car = Utils.getFlag('A', options);
751	m_verbose = Utils.getFlag('V', options);
752	m_treatZeroAsMissing = Utils.getFlag('Z', options);
753
754	setRemoveAllMissingCols(Utils.getFlag('R', options));
755	}
756
757	/**
758	* Gets the current settings of the Apriori object.
759	*
760	* @return an array of strings suitable for passing to setOptions
761	*/
762	public String [] getOptions() {
763
764	String [] options = new String [21];
765	int current = 0;
766
767	if (m_outputItemSets) {
768	options[current++] = "-I";
769	}
770
771	if (getRemoveAllMissingCols()) {
772	options[current++] = "-R";
773	}
774
775	options[current++] = "-N"; options[current++] = "" + m_numRules;
776	options[current++] = "-T"; options[current++] = "" + m_metricType;
777	options[current++] = "-C"; options[current++] = "" + m_minMetric;
778	options[current++] = "-D"; options[current++] = "" + m_delta;
779	options[current++] = "-U"; options[current++] = "" + m_upperBoundMinSupport;
780	options[current++] = "-M"; options[current++] = "" + m_lowerBoundMinSupport;
781	options[current++] = "-S"; options[current++] = "" + m_significanceLevel;
782	if (m_car)
783	options[current++] = "-A";
784	if (m_verbose)
785	options[current++] = "-V";
786
787	if (m_treatZeroAsMissing) {
788	options[current++] = "-Z";
789	}
790	options[current++] = "-c"; options[current++] = "" + m_classIndex;
791
792	while (current < options.length) {
793	options[current++] = "";
794	}
795	return options;
796	}
797
798	/**
799	* Outputs the size of all the generated sets of itemsets and the rules.
800	*
801	* @return a string representation of the model
802	*/
803	public String toString() {
804
805	StringBuffer text = new StringBuffer();
806
807	if (m_Ls.size() <= 1)
808	return "\nNo large itemsets and rules found!\n";
809	text.append("\nApriori\n=======\n\n");
810	text.append("Minimum support: "
811	+ Utils.doubleToString(m_minSupport,2)
812	+ " (" + ((int)(m_minSupport * (double)m_instances.numInstances()+0.5))
813	+ " instances)"
814	+ '\n');
815	text.append("Minimum metric <");
816	switch(m_metricType) {
817	case CONFIDENCE:
818	text.append("confidence>: ");
819	break;
820	case LIFT:
821	text.append("lift>: ");
822	break;
823	case LEVERAGE:
824	text.append("leverage>: ");
825	break;
826	case CONVICTION:
827	text.append("conviction>: ");
828	break;
829	}
830	text.append(Utils.doubleToString(m_minMetric,2)+'\n');
831
832	if (m_significanceLevel != -1)
833	text.append("Significance level: "+
834	Utils.doubleToString(m_significanceLevel,2)+'\n');
835	text.append("Number of cycles performed: " + m_cycles+'\n');
836	text.append("\nGenerated sets of large itemsets:\n");
837	if(!m_car){
838	for (int i = 0; i < m_Ls.size(); i++) {
839	text.append("\nSize of set of large itemsets L("+(i+1)+"): "+
840	((FastVector)m_Ls.elementAt(i)).size()+'\n');
841	if (m_outputItemSets) {
842	text.append("\nLarge Itemsets L("+(i+1)+"):\n");
843	for (int j = 0; j < ((FastVector)m_Ls.elementAt(i)).size(); j++)
844	text.append(((AprioriItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).
845	toString(m_instances)+"\n");
846	}
847	}
848	text.append("\nBest rules found:\n\n");
849	for (int i = 0; i < m_allTheRules[0].size(); i++) {
850	text.append(Utils.doubleToString((double)i+1,
851	(int)(Math.log(m_numRules)/Math.log(10)+1),0)+
852	". " + ((AprioriItemSet)m_allTheRules[0].elementAt(i)).
853	toString(m_instances)
854	+ " ==> " + ((AprioriItemSet)m_allTheRules[1].elementAt(i)).
855	toString(m_instances) +" conf:("+
856	Utils.doubleToString(((Double)m_allTheRules[2].
857	elementAt(i)).doubleValue(),2)+")");
858	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
859	text.append((m_metricType == LIFT ? " <" : "")+" lift:("+
860	Utils.doubleToString(((Double)m_allTheRules[3].
861	elementAt(i)).doubleValue(),2)
862	+")"+(m_metricType == LIFT ? ">" : ""));
863	text.append((m_metricType == LEVERAGE ? " <" : "")+" lev:("+
864	Utils.doubleToString(((Double)m_allTheRules[4].
865	elementAt(i)).doubleValue(),2)
866	+")");
867	text.append(" ["+
868	(int)(((Double)m_allTheRules[4].elementAt(i))
869	.doubleValue() * (double)m_instances.numInstances())
870	+"]"+(m_metricType == LEVERAGE ? ">" : ""));
871	text.append((m_metricType == CONVICTION ? " <" : "")+" conv:("+
872	Utils.doubleToString(((Double)m_allTheRules[5].
873	elementAt(i)).doubleValue(),2)
874	+")"+(m_metricType == CONVICTION ? ">" : ""));
875	}
876	text.append('\n');
877	}
878	}
879	else{
880	for (int i = 0; i < m_Ls.size(); i++) {
881	text.append("\nSize of set of large itemsets L("+(i+1)+"): "+
882	((FastVector)m_Ls.elementAt(i)).size()+'\n');
883	if (m_outputItemSets) {
884	text.append("\nLarge Itemsets L("+(i+1)+"):\n");
885	for (int j = 0; j < ((FastVector)m_Ls.elementAt(i)).size(); j++){
886	text.append(((ItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).
887	toString(m_instances)+"\n");
888	text.append(((LabeledItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).m_classLabel+" ");
889	text.append(((LabeledItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).support()+"\n");
890	}
891	}
892	}
893	text.append("\nBest rules found:\n\n");
894	for (int i = 0; i < m_allTheRules[0].size(); i++) {
895	text.append(Utils.doubleToString((double)i+1,
896	(int)(Math.log(m_numRules)/Math.log(10)+1),0)+
897	". " + ((ItemSet)m_allTheRules[0].elementAt(i)).
898	toString(m_instances)
899	+ " ==> " + ((ItemSet)m_allTheRules[1].elementAt(i)).
900	toString(m_onlyClass) +" conf:("+
901	Utils.doubleToString(((Double)m_allTheRules[2].
902	elementAt(i)).doubleValue(),2)+")");
903
904	text.append('\n');
905	}
906	}
907	return text.toString();
908	}
909
910	/**
911	* Returns the metric string for the chosen metric type
912	* @return a string describing the used metric for the interestingness of a class association rule
913	*/
914	public String metricString() {
915
916	switch(m_metricType) {
917	case LIFT:
918	return "lif";
919	case LEVERAGE:
920	return "leverage";
921	case CONVICTION:
922	return "conviction";
923	default:
924	return "conf";
925	}
926	}
927
928	/**
929	* Returns the tip text for this property
930	* @return tip text for this property suitable for
931	* displaying in the explorer/experimenter gui
932	*/
933	public String removeAllMissingColsTipText() {
934	return "Remove columns with all missing values.";
935	}
936
937	/**
938	* Remove columns containing all missing values.
939	* @param r true if cols are to be removed.
940	*/
941	public void setRemoveAllMissingCols(boolean r) {
942	m_removeMissingCols = r;
943	}
944
945	/**
946	* Returns whether columns containing all missing values are to be removed
947	* @return true if columns are to be removed.
948	*/
949	public boolean getRemoveAllMissingCols() {
950	return m_removeMissingCols;
951	}
952
953	/**
954	* Returns the tip text for this property
955	* @return tip text for this property suitable for
956	* displaying in the explorer/experimenter gui
957	*/
958	public String upperBoundMinSupportTipText() {
959	return "Upper bound for minimum support. Start iteratively decreasing "
960	+"minimum support from this value.";
961	}
962
963	/**
964	* Get the value of upperBoundMinSupport.
965	*
966	* @return Value of upperBoundMinSupport.
967	*/
968	public double getUpperBoundMinSupport() {
969
970	return m_upperBoundMinSupport;
971	}
972
973	/**
974	* Set the value of upperBoundMinSupport.
975	*
976	* @param v Value to assign to upperBoundMinSupport.
977	*/
978	public void setUpperBoundMinSupport(double v) {
979
980	m_upperBoundMinSupport = v;
981	}
982
983	/**
984	* Sets the class index
985	* @param index the class index
986	*/
987	public void setClassIndex(int index){
988
989	m_classIndex = index;
990	}
991
992	/**
993	* Gets the class index
994	* @return the index of the class attribute
995	*/
996	public int getClassIndex(){
997
998	return m_classIndex;
999	}
1000
1001	/**
1002	* Returns the tip text for this property
1003	* @return tip text for this property suitable for
1004	* displaying in the explorer/experimenter gui
1005	*/
1006	public String classIndexTipText() {
1007	return "Index of the class attribute. If set to -1, the last attribute is taken as class attribute.";
1008
1009	}
1010
1011	/**
1012	* Sets class association rule mining
1013	* @param flag if class association rules are mined, false otherwise
1014	*/
1015	public void setCar(boolean flag){
1016	m_car = flag;
1017	}
1018
1019	/**
1020	* Gets whether class association ruels are mined
1021	* @return true if class association rules are mined, false otherwise
1022	*/
1023	public boolean getCar(){
1024	return m_car;
1025	}
1026
1027	/**
1028	* Returns the tip text for this property
1029	* @return tip text for this property suitable for
1030	* displaying in the explorer/experimenter gui
1031	*/
1032	public String carTipText() {
1033	return "If enabled class association rules are mined instead of (general) association rules.";
1034	}
1035
1036	/**
1037	* Returns the tip text for this property
1038	* @return tip text for this property suitable for
1039	* displaying in the explorer/experimenter gui
1040	*/
1041	public String lowerBoundMinSupportTipText() {
1042	return "Lower bound for minimum support.";
1043	}
1044
1045	/**
1046	* Get the value of lowerBoundMinSupport.
1047	*
1048	* @return Value of lowerBoundMinSupport.
1049	*/
1050	public double getLowerBoundMinSupport() {
1051
1052	return m_lowerBoundMinSupport;
1053	}
1054
1055	/**
1056	* Set the value of lowerBoundMinSupport.
1057	*
1058	* @param v Value to assign to lowerBoundMinSupport.
1059	*/
1060	public void setLowerBoundMinSupport(double v) {
1061
1062	m_lowerBoundMinSupport = v;
1063	}
1064
1065	/**
1066	* Get the metric type
1067	*
1068	* @return the type of metric to use for ranking rules
1069	*/
1070	public SelectedTag getMetricType() {
1071	return new SelectedTag(m_metricType, TAGS_SELECTION);
1072	}
1073
1074	/**
1075	* Returns the tip text for this property
1076	* @return tip text for this property suitable for
1077	* displaying in the explorer/experimenter gui
1078	*/
1079	public String metricTypeTipText() {
1080	return "Set the type of metric by which to rank rules. Confidence is "
1081	+"the proportion of the examples covered by the premise that are also "
1082	+"covered by the consequence(Class association rules can only be mined using confidence). Lift is confidence divided by the "
1083	+"proportion of all examples that are covered by the consequence. This "
1084	+"is a measure of the importance of the association that is independent "
1085	+"of support. Leverage is the proportion of additional examples covered "
1086	+"by both the premise and consequence above those expected if the "
1087	+"premise and consequence were independent of each other. The total "
1088	+"number of examples that this represents is presented in brackets "
1089	+"following the leverage. Conviction is "
1090	+"another measure of departure from independence. Conviction is given "
1091	+"by ";
1092	}
1093
1094	/**
1095	* Set the metric type for ranking rules
1096	*
1097	* @param d the type of metric
1098	*/
1099	public void setMetricType (SelectedTag d) {
1100
1101	if (d.getTags() == TAGS_SELECTION) {
1102	m_metricType = d.getSelectedTag().getID();
1103	}
1104
1105	if (m_significanceLevel != -1 && m_metricType != CONFIDENCE) {
1106	m_metricType = CONFIDENCE;
1107	}
1108
1109	if (m_metricType == CONFIDENCE) {
1110	setMinMetric(0.9);
1111	}
1112
1113	if (m_metricType == LIFT \|\| m_metricType == CONVICTION) {
1114	setMinMetric(1.1);
1115	}
1116
1117	if (m_metricType == LEVERAGE) {
1118	setMinMetric(0.1);
1119	}
1120	}
1121
1122	/**
1123	* Returns the tip text for this property
1124	* @return tip text for this property suitable for
1125	* displaying in the explorer/experimenter gui
1126	*/
1127	public String minMetricTipText() {
1128	return "Minimum metric score. Consider only rules with scores higher than "
1129	+"this value.";
1130	}
1131
1132	/**
1133	* Get the value of minConfidence.
1134	*
1135	* @return Value of minConfidence.
1136	*/
1137	public double getMinMetric() {
1138
1139	return m_minMetric;
1140	}
1141
1142	/**
1143	* Set the value of minConfidence.
1144	*
1145	* @param v Value to assign to minConfidence.
1146	*/
1147	public void setMinMetric(double v) {
1148
1149	m_minMetric = v;
1150	}
1151
1152	/**
1153	* Returns the tip text for this property
1154	* @return tip text for this property suitable for
1155	* displaying in the explorer/experimenter gui
1156	*/
1157	public String numRulesTipText() {
1158	return "Number of rules to find.";
1159	}
1160
1161	/**
1162	* Get the value of numRules.
1163	*
1164	* @return Value of numRules.
1165	*/
1166	public int getNumRules() {
1167
1168	return m_numRules;
1169	}
1170
1171	/**
1172	* Set the value of numRules.
1173	*
1174	* @param v Value to assign to numRules.
1175	*/
1176	public void setNumRules(int v) {
1177
1178	m_numRules = v;
1179	}
1180
1181	/**
1182	* Returns the tip text for this property
1183	* @return tip text for this property suitable for
1184	* displaying in the explorer/experimenter gui
1185	*/
1186	public String deltaTipText() {
1187	return "Iteratively decrease support by this factor. Reduces support "
1188	+"until min support is reached or required number of rules has been "
1189	+"generated.";
1190	}
1191
1192	/**
1193	* Get the value of delta.
1194	*
1195	* @return Value of delta.
1196	*/
1197	public double getDelta() {
1198
1199	return m_delta;
1200	}
1201
1202	/**
1203	* Set the value of delta.
1204	*
1205	* @param v Value to assign to delta.
1206	*/
1207	public void setDelta(double v) {
1208
1209	m_delta = v;
1210	}
1211
1212	/**
1213	* Returns the tip text for this property
1214	* @return tip text for this property suitable for
1215	* displaying in the explorer/experimenter gui
1216	*/
1217	public String significanceLevelTipText() {
1218	return "Significance level. Significance test (confidence metric only).";
1219	}
1220
1221	/**
1222	* Get the value of significanceLevel.
1223	*
1224	* @return Value of significanceLevel.
1225	*/
1226	public double getSignificanceLevel() {
1227
1228	return m_significanceLevel;
1229	}
1230
1231	/**
1232	* Set the value of significanceLevel.
1233	*
1234	* @param v Value to assign to significanceLevel.
1235	*/
1236	public void setSignificanceLevel(double v) {
1237
1238	m_significanceLevel = v;
1239	}
1240
1241	/**
1242	* Sets whether itemsets are output as well
1243	* @param flag true if itemsets are to be output as well
1244	*/
1245	public void setOutputItemSets(boolean flag){
1246	m_outputItemSets = flag;
1247	}
1248
1249	/**
1250	* Gets whether itemsets are output as well
1251	* @return true if itemsets are output as well
1252	*/
1253	public boolean getOutputItemSets(){
1254	return m_outputItemSets;
1255	}
1256
1257	/**
1258	* Returns the tip text for this property
1259	* @return tip text for this property suitable for
1260	* displaying in the explorer/experimenter gui
1261	*/
1262	public String outputItemSetsTipText() {
1263	return "If enabled the itemsets are output as well.";
1264	}
1265
1266	/**
1267	* Sets verbose mode
1268	* @param flag true if algorithm should be run in verbose mode
1269	*/
1270	public void setVerbose(boolean flag){
1271	m_verbose = flag;
1272	}
1273
1274	/**
1275	* Gets whether algorithm is run in verbose mode
1276	* @return true if algorithm is run in verbose mode
1277	*/
1278	public boolean getVerbose(){
1279	return m_verbose;
1280	}
1281
1282	/**
1283	* Returns the tip text for this property
1284	* @return tip text for this property suitable for
1285	* displaying in the explorer/experimenter gui
1286	*/
1287	public String verboseTipText() {
1288	return "If enabled the algorithm will be run in verbose mode.";
1289	}
1290
1291	/**
1292	* Returns the tip text for this property
1293	* @return tip text for this property suitable for
1294	* displaying in the explorer/experimenter gui
1295	*/
1296	public String treatZeroAsMissingTipText() {
1297	return "If enabled, zero (that is, the first value of a nominal) is "
1298	+ "treated in the same way as a missing value.";
1299	}
1300
1301	/**
1302	* Sets whether zeros (i.e. the first value of a nominal attribute)
1303	* should be treated as missing values.
1304	*
1305	* @param z true if zeros should be treated as missing values.
1306	*/
1307	public void setTreatZeroAsMissing(boolean z) {
1308	m_treatZeroAsMissing = z;
1309	}
1310
1311	/**
1312	* Gets whether zeros (i.e. the first value of a nominal attribute)
1313	* is to be treated int he same way as missing values.
1314	*
1315	* @return true if zeros are to be treated like missing values.
1316	*/
1317	public boolean getTreatZeroAsMissing() {
1318	return m_treatZeroAsMissing;
1319	}
1320
1321	/**
1322	* Method that finds all large itemsets for the given set of instances.
1323	*
1324	* @throws Exception if an attribute is numeric
1325	*/
1326	private void findLargeItemSets() throws Exception {
1327
1328	FastVector kMinusOneSets, kSets;
1329	Hashtable hashtable;
1330	int necSupport, necMaxSupport,i = 0;
1331
1332
1333
1334	// Find large itemsets
1335
1336	// minimum support
1337	necSupport = (int)(m_minSupport * (double)m_instances.numInstances()+0.5);
1338	necMaxSupport = (int)(m_upperBoundMinSupport * (double)m_instances.numInstances()+0.5);
1339
1340	kSets = AprioriItemSet.singletons(m_instances, m_treatZeroAsMissing);
1341	AprioriItemSet.upDateCounters(kSets,m_instances);
1342	kSets = AprioriItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
1343	if (kSets.size() == 0)
1344	return;
1345	do {
1346	m_Ls.addElement(kSets);
1347	kMinusOneSets = kSets;
1348	kSets = AprioriItemSet.mergeAllItemSets(kMinusOneSets, i, m_instances.numInstances());
1349	hashtable = AprioriItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size());
1350	m_hashtables.addElement(hashtable);
1351	kSets = AprioriItemSet.pruneItemSets(kSets, hashtable);
1352	AprioriItemSet.upDateCounters(kSets, m_instances);
1353	kSets = AprioriItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
1354	i++;
1355	} while (kSets.size() > 0);
1356	}
1357
1358	/**
1359	* Method that finds all association rules and performs significance test.
1360	*
1361	* @throws Exception if an attribute is numeric
1362	*/
1363	private void findRulesBruteForce() throws Exception {
1364
1365	FastVector[] rules;
1366
1367	// Build rules
1368	for (int j = 1; j < m_Ls.size(); j++) {
1369	FastVector currentItemSets = (FastVector)m_Ls.elementAt(j);
1370	Enumeration enumItemSets = currentItemSets.elements();
1371	while (enumItemSets.hasMoreElements()) {
1372	AprioriItemSet currentItemSet = (AprioriItemSet)enumItemSets.nextElement();
1373	//AprioriItemSet currentItemSet = new AprioriItemSet((ItemSet)enumItemSets.nextElement());
1374	rules=currentItemSet.generateRulesBruteForce(m_minMetric,m_metricType,
1375	m_hashtables,j+1,
1376	m_instances.numInstances(),
1377	m_significanceLevel);
1378	for (int k = 0; k < rules[0].size(); k++) {
1379	m_allTheRules[0].addElement(rules[0].elementAt(k));
1380	m_allTheRules[1].addElement(rules[1].elementAt(k));
1381	m_allTheRules[2].addElement(rules[2].elementAt(k));
1382
1383	m_allTheRules[3].addElement(rules[3].elementAt(k));
1384	m_allTheRules[4].addElement(rules[4].elementAt(k));
1385	m_allTheRules[5].addElement(rules[5].elementAt(k));
1386	}
1387	}
1388	}
1389	}
1390
1391	/**
1392	* Method that finds all association rules.
1393	*
1394	* @throws Exception if an attribute is numeric
1395	*/
1396	private void findRulesQuickly() throws Exception {
1397
1398	FastVector[] rules;
1399
1400	// Build rules
1401	for (int j = 1; j < m_Ls.size(); j++) {
1402	FastVector currentItemSets = (FastVector)m_Ls.elementAt(j);
1403	Enumeration enumItemSets = currentItemSets.elements();
1404	while (enumItemSets.hasMoreElements()) {
1405	AprioriItemSet currentItemSet = (AprioriItemSet)enumItemSets.nextElement();
1406	//AprioriItemSet currentItemSet = new AprioriItemSet((ItemSet)enumItemSets.nextElement());
1407	rules = currentItemSet.generateRules(m_minMetric, m_hashtables, j + 1);
1408	for (int k = 0; k < rules[0].size(); k++) {
1409	m_allTheRules[0].addElement(rules[0].elementAt(k));
1410	m_allTheRules[1].addElement(rules[1].elementAt(k));
1411	m_allTheRules[2].addElement(rules[2].elementAt(k));
1412	}
1413	}
1414	}
1415	}
1416
1417	/**
1418	*
1419	* Method that finds all large itemsets for class association rules for the given set of instances.
1420	* @throws Exception if an attribute is numeric
1421	*/
1422	private void findLargeCarItemSets() throws Exception {
1423
1424	FastVector kMinusOneSets, kSets;
1425	Hashtable hashtable;
1426	int necSupport, necMaxSupport,i = 0;
1427
1428	// Find large itemsets
1429
1430	// minimum support
1431	double nextMinSupport = m_minSupport*(double)m_instances.numInstances();
1432	double nextMaxSupport = m_upperBoundMinSupport*(double)m_instances.numInstances();
1433	if((double)Math.rint(nextMinSupport) == nextMinSupport){
1434	necSupport = (int) nextMinSupport;
1435	}
1436	else{
1437	necSupport = Math.round((float)(nextMinSupport+0.5));
1438	}
1439	if((double)Math.rint(nextMaxSupport) == nextMaxSupport){
1440	necMaxSupport = (int) nextMaxSupport;
1441	}
1442	else{
1443	necMaxSupport = Math.round((float)(nextMaxSupport+0.5));
1444	}
1445
1446	//find item sets of length one
1447	kSets = LabeledItemSet.singletons(m_instances,m_onlyClass);
1448	LabeledItemSet.upDateCounters(kSets, m_instances,m_onlyClass);
1449
1450	//check if a item set of lentgh one is frequent, if not delete it
1451	kSets = LabeledItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
1452	if (kSets.size() == 0)
1453	return;
1454	do {
1455	m_Ls.addElement(kSets);
1456	kMinusOneSets = kSets;
1457	kSets = LabeledItemSet.mergeAllItemSets(kMinusOneSets, i, m_instances.numInstances());
1458	hashtable = LabeledItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size());
1459	kSets = LabeledItemSet.pruneItemSets(kSets, hashtable);
1460	LabeledItemSet.upDateCounters(kSets, m_instances,m_onlyClass);
1461	kSets = LabeledItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
1462	i++;
1463	} while (kSets.size() > 0);
1464	}
1465
1466
1467
1468	/**
1469	* Method that finds all class association rules.
1470	*
1471	* @throws Exception if an attribute is numeric
1472	*/
1473	private void findCarRulesQuickly() throws Exception {
1474
1475	FastVector[] rules;
1476
1477	// Build rules
1478	for (int j = 0; j < m_Ls.size(); j++) {
1479	FastVector currentLabeledItemSets = (FastVector)m_Ls.elementAt(j);
1480	Enumeration enumLabeledItemSets = currentLabeledItemSets.elements();
1481	while (enumLabeledItemSets.hasMoreElements()) {
1482	LabeledItemSet currentLabeledItemSet = (LabeledItemSet)enumLabeledItemSets.nextElement();
1483	rules = currentLabeledItemSet.generateRules(m_minMetric,false);
1484	for (int k = 0; k < rules[0].size(); k++) {
1485	m_allTheRules[0].addElement(rules[0].elementAt(k));
1486	m_allTheRules[1].addElement(rules[1].elementAt(k));
1487	m_allTheRules[2].addElement(rules[2].elementAt(k));
1488	}
1489	}
1490	}
1491	}
1492
1493	/**
1494	* returns all the rules
1495	*
1496	* @return all the rules
1497	* @see #m_allTheRules
1498	*/
1499	public FastVector[] getAllTheRules() {
1500	return m_allTheRules;
1501	}
1502
1503	/**
1504	* Returns the revision string.
1505	*
1506	* @return the revision
1507	*/
1508	public String getRevision() {
1509	return RevisionUtils.extract("$Revision: 5698 $");
1510	}
1511
1512	/**
1513	* Main method.
1514	*
1515	* @param args the commandline options
1516	*/
1517	public static void main(String[] args) {
1518	runAssociator(new Apriori(), args);
1519	}
1520	}
1521

Note: See TracBrowser for help on using the repository browser.

Download in other formats: