Context Navigation

Apriori.java

Last change on this file was 29, checked in by gnappo, 14 years ago
Taggata versione per la demo e aggiunto branch.
File size: 48.2 KB

Rev	Line
[29]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* Apriori.java
	19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23	package weka.associations;
	24
	25	import weka.core.AttributeStats;
	26	import weka.core.Capabilities;
	27	import weka.core.FastVector;
	28	import weka.core.Instances;
	29	import weka.core.Option;
	30	import weka.core.OptionHandler;
	31	import weka.core.RevisionUtils;
	32	import weka.core.SelectedTag;
	33	import weka.core.Tag;
	34	import weka.core.TechnicalInformation;
	35	import weka.core.TechnicalInformationHandler;
	36	import weka.core.Utils;
	37	import weka.core.Capabilities.Capability;
	38	import weka.core.TechnicalInformation.Field;
	39	import weka.core.TechnicalInformation.Type;
	40	import weka.filters.Filter;
	41	import weka.filters.unsupervised.attribute.Remove;
	42
	43	import java.util.Enumeration;
	44	import java.util.Hashtable;
	45
	46	/**
	47	<!-- globalinfo-start -->
	48	* Class implementing an Apriori-type algorithm. Iteratively reduces the minimum support until it finds the required number of rules with the given minimum confidence.<br/>
	49	* The algorithm has an option to mine class association rules. It is adapted as explained in the second reference.<br/>
	50	* <br/>
	51	* For more information see:<br/>
	52	* <br/>
	53	* R. Agrawal, R. Srikant: Fast Algorithms for Mining Association Rules in Large Databases. In: 20th International Conference on Very Large Data Bases, 478-499, 1994.<br/>
	54	* <br/>
	55	* Bing Liu, Wynne Hsu, Yiming Ma: Integrating Classification and Association Rule Mining. In: Fourth International Conference on Knowledge Discovery and Data Mining, 80-86, 1998.
	56	* <p/>
	57	<!-- globalinfo-end -->
	58	*
	59	<!-- technical-bibtex-start -->
	60	* BibTeX:
	61	* <pre>
	62	* @inproceedings{Agrawal1994,
	63	* author = {R. Agrawal and R. Srikant},
	64	* booktitle = {20th International Conference on Very Large Data Bases},
	65	* pages = {478-499},
	66	* publisher = {Morgan Kaufmann, Los Altos, CA},
	67	* title = {Fast Algorithms for Mining Association Rules in Large Databases},
	68	* year = {1994}
	69	* }
	70	*
	71	* @inproceedings{Liu1998,
	72	* author = {Bing Liu and Wynne Hsu and Yiming Ma},
	73	* booktitle = {Fourth International Conference on Knowledge Discovery and Data Mining},
	74	* pages = {80-86},
	75	* publisher = {AAAI Press},
	76	* title = {Integrating Classification and Association Rule Mining},
	77	* year = {1998}
	78	* }
	79	* </pre>
	80	* <p/>
	81	<!-- technical-bibtex-end -->
	82	*
	83	<!-- options-start -->
	84	* Valid options are: <p/>
	85	*
	86	* <pre> -N <required number of rules output>
	87	* The required number of rules. (default = 10)</pre>
	88	*
	89	* <pre> -T <0=confidence \| 1=lift \| 2=leverage \| 3=Conviction>
	90	* The metric type by which to rank rules. (default = confidence)</pre>
	91	*
	92	* <pre> -C <minimum metric score of a rule>
	93	* The minimum confidence of a rule. (default = 0.9)</pre>
	94	*
	95	* <pre> -D <delta for minimum support>
	96	* The delta by which the minimum support is decreased in
	97	* each iteration. (default = 0.05)</pre>
	98	*
	99	* <pre> -U <upper bound for minimum support>
	100	* Upper bound for minimum support. (default = 1.0)</pre>
	101	*
	102	* <pre> -M <lower bound for minimum support>
	103	* The lower bound for the minimum support. (default = 0.1)</pre>
	104	*
	105	* <pre> -S <significance level>
	106	* If used, rules are tested for significance at
	107	* the given level. Slower. (default = no significance testing)</pre>
	108	*
	109	* <pre> -I
	110	* If set the itemsets found are also output. (default = no)</pre>
	111	*
	112	* <pre> -R
	113	* Remove columns that contain all missing values (default = no)</pre>
	114	*
	115	* <pre> -V
	116	* Report progress iteratively. (default = no)</pre>
	117	*
	118	* <pre> -A
	119	* If set class association rules are mined. (default = no)</pre>
	120	*
	121	* <pre> -c <the class index>
	122	* The class index. (default = last)</pre>
	123	*
	124	<!-- options-end -->
	125	*
	126	* @author Eibe Frank (eibe@cs.waikato.ac.nz)
	127	* @author Mark Hall (mhall@cs.waikato.ac.nz)
	128	* @author Stefan Mutter (mutter@cs.waikato.ac.nz)
	129	* @version $Revision: 5698 $
	130	*/
	131	public class Apriori
	132	extends AbstractAssociator
	133	implements OptionHandler, CARuleMiner, TechnicalInformationHandler {
	134
	135	/** for serialization */
	136	static final long serialVersionUID = 3277498842319212687L;
	137
	138	/** The minimum support. */
	139	protected double m_minSupport;
	140
	141	/** The upper bound on the support */
	142	protected double m_upperBoundMinSupport;
	143
	144	/** The lower bound for the minimum support. */
	145	protected double m_lowerBoundMinSupport;
	146
	147	/** Metric type: Confidence */
	148	protected static final int CONFIDENCE = 0;
	149	/** Metric type: Lift */
	150	protected static final int LIFT = 1;
	151	/** Metric type: Leverage */
	152	protected static final int LEVERAGE = 2;
	153	/** Metric type: Conviction */
	154	protected static final int CONVICTION = 3;
	155	/** Metric types. */
	156	public static final Tag [] TAGS_SELECTION = {
	157	new Tag(CONFIDENCE, "Confidence"),
	158	new Tag(LIFT, "Lift"),
	159	new Tag(LEVERAGE, "Leverage"),
	160	new Tag(CONVICTION, "Conviction")
	161	};
	162
	163	/** The selected metric type. */
	164	protected int m_metricType = CONFIDENCE;
	165
	166	/** The minimum metric score. */
	167	protected double m_minMetric;
	168
	169	/** The maximum number of rules that are output. */
	170	protected int m_numRules;
	171
	172	/** Delta by which m_minSupport is decreased in each iteration. */
	173	protected double m_delta;
	174
	175	/** Significance level for optional significance test. */
	176	protected double m_significanceLevel;
	177
	178	/** Number of cycles used before required number of rules was one. */
	179	protected int m_cycles;
	180
	181	/** The set of all sets of itemsets L. */
	182	protected FastVector m_Ls;
	183
	184	/** The same information stored in hash tables. */
	185	protected FastVector m_hashtables;
	186
	187	/** The list of all generated rules. */
	188	protected FastVector[] m_allTheRules;
	189
	190	/** The instances (transactions) to be used for generating
	191	the association rules. */
	192	protected Instances m_instances;
	193
	194	/** Output itemsets found? */
	195	protected boolean m_outputItemSets;
	196
	197	/** Remove columns with all missing values */
	198	protected boolean m_removeMissingCols;
	199
	200	/** Report progress iteratively */
	201	protected boolean m_verbose;
	202
	203	/** Only the class attribute of all Instances.*/
	204	protected Instances m_onlyClass;
	205
	206	/** The class index. */
	207	protected int m_classIndex;
	208
	209	/** Flag indicating whether class association rules are mined. */
	210	protected boolean m_car;
	211
	212	/**
	213	* Treat zeros as missing (rather than a value in their
	214	* own right)
	215	*/
	216	protected boolean m_treatZeroAsMissing = false;
	217
	218	/**
	219	* Returns a string describing this associator
	220	* @return a description of the evaluator suitable for
	221	* displaying in the explorer/experimenter gui
	222	*/
	223	public String globalInfo() {
	224	return
	225	"Class implementing an Apriori-type algorithm. Iteratively reduces "
	226	+ "the minimum support until it finds the required number of rules with "
	227	+ "the given minimum confidence.\n"
	228	+ "The algorithm has an option to mine class association rules. It is "
	229	+ "adapted as explained in the second reference.\n\n"
	230	+ "For more information see:\n\n"
	231	+ getTechnicalInformation().toString();
	232	}
	233
	234	/**
	235	* Returns an instance of a TechnicalInformation object, containing
	236	* detailed information about the technical background of this class,
	237	* e.g., paper reference or book this class is based on.
	238	*
	239	* @return the technical information about this class
	240	*/
	241	public TechnicalInformation getTechnicalInformation() {
	242	TechnicalInformation result;
	243	TechnicalInformation additional;
	244
	245	result = new TechnicalInformation(Type.INPROCEEDINGS);
	246	result.setValue(Field.AUTHOR, "R. Agrawal and R. Srikant");
	247	result.setValue(Field.TITLE, "Fast Algorithms for Mining Association Rules in Large Databases");
	248	result.setValue(Field.BOOKTITLE, "20th International Conference on Very Large Data Bases");
	249	result.setValue(Field.YEAR, "1994");
	250	result.setValue(Field.PAGES, "478-499");
	251	result.setValue(Field.PUBLISHER, "Morgan Kaufmann, Los Altos, CA");
	252
	253	additional = result.add(Type.INPROCEEDINGS);
	254	additional.setValue(Field.AUTHOR, "Bing Liu and Wynne Hsu and Yiming Ma");
	255	additional.setValue(Field.TITLE, "Integrating Classification and Association Rule Mining");
	256	additional.setValue(Field.BOOKTITLE, "Fourth International Conference on Knowledge Discovery and Data Mining");
	257	additional.setValue(Field.YEAR, "1998");
	258	additional.setValue(Field.PAGES, "80-86");
	259	additional.setValue(Field.PUBLISHER, "AAAI Press");
	260
	261	return result;
	262	}
	263
	264	/**
	265	* Constructor that allows to sets default values for the
	266	* minimum confidence and the maximum number of rules
	267	* the minimum confidence.
	268	*/
	269	public Apriori() {
	270
	271	resetOptions();
	272	}
	273
	274	/**
	275	* Resets the options to the default values.
	276	*/
	277	public void resetOptions() {
	278
	279	m_removeMissingCols = false;
	280	m_verbose = false;
	281	m_delta = 0.05;
	282	m_minMetric = 0.90;
	283	m_numRules = 10;
	284	m_lowerBoundMinSupport = 0.1;
	285	m_upperBoundMinSupport = 1.0;
	286	m_significanceLevel = -1;
	287	m_outputItemSets = false;
	288	m_car = false;
	289	m_classIndex = -1;
	290	}
	291
	292	/**
	293	* Removes columns that are all missing from the data
	294	* @param instances the instances
	295	* @return a new set of instances with all missing columns removed
	296	* @throws Exception if something goes wrong
	297	*/
	298	protected Instances removeMissingColumns(Instances instances)
	299	throws Exception {
	300
	301	int numInstances = instances.numInstances();
	302	StringBuffer deleteString = new StringBuffer();
	303	int removeCount = 0;
	304	boolean first = true;
	305	int maxCount = 0;
	306
	307	for (int i=0;i<instances.numAttributes();i++) {
	308	AttributeStats as = instances.attributeStats(i);
	309	if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
	310	// see if we can decrease this by looking for the most frequent value
	311	int [] counts = as.nominalCounts;
	312	if (counts[Utils.maxIndex(counts)] > maxCount) {
	313	maxCount = counts[Utils.maxIndex(counts)];
	314	}
	315	}
	316	if (as.missingCount == numInstances) {
	317	if (first) {
	318	deleteString.append((i+1));
	319	first = false;
	320	} else {
	321	deleteString.append(","+(i+1));
	322	}
	323	removeCount++;
	324	}
	325	}
	326	if (m_verbose) {
	327	System.err.println("Removed : "+removeCount+" columns with all missing "
	328	+"values.");
	329	}
	330	if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
	331	m_upperBoundMinSupport = (double)maxCount / (double)numInstances;
	332	if (m_verbose) {
	333	System.err.println("Setting upper bound min support to : "
	334	+m_upperBoundMinSupport);
	335	}
	336	}
	337
	338	if (deleteString.toString().length() > 0) {
	339	Remove af = new Remove();
	340	af.setAttributeIndices(deleteString.toString());
	341	af.setInvertSelection(false);
	342	af.setInputFormat(instances);
	343	Instances newInst = Filter.useFilter(instances, af);
	344
	345	return newInst;
	346	}
	347	return instances;
	348	}
	349
	350	/**
	351	* Returns default capabilities of the classifier.
	352	*
	353	* @return the capabilities of this classifier
	354	*/
	355	public Capabilities getCapabilities() {
	356	Capabilities result = super.getCapabilities();
	357	result.disableAll();
	358
	359	// enable what we can handle
	360
	361	// attributes
	362	result.enable(Capability.NOMINAL_ATTRIBUTES);
	363	result.enable(Capability.MISSING_VALUES);
	364
	365	// class (can handle a nominal class if CAR rules are selected). This
	366	result.enable(Capability.NO_CLASS);
	367	result.enable(Capability.NOMINAL_CLASS);
	368	result.enable(Capability.MISSING_CLASS_VALUES);
	369
	370	return result;
	371	}
	372
	373	/**
	374	* Method that generates all large itemsets with a minimum support, and from
	375	* these all association rules with a minimum confidence.
	376	*
	377	* @param instances the instances to be used for generating the associations
	378	* @throws Exception if rules can't be built successfully
	379	*/
	380	public void buildAssociations(Instances instances) throws Exception {
	381
	382	double[] confidences, supports;
	383	int[] indices;
	384	FastVector[] sortedRuleSet;
	385	double necSupport=0;
	386
	387	instances = new Instances(instances);
	388
	389	if (m_removeMissingCols) {
	390	instances = removeMissingColumns(instances);
	391	}
	392	if(m_car && m_metricType != CONFIDENCE)
	393	throw new Exception("For CAR-Mining metric type has to be confidence!");
	394
	395	// only set class index if CAR is requested
	396	if (m_car) {
	397	if (m_classIndex == -1 ) {
	398	instances.setClassIndex(instances.numAttributes()-1);
	399	} else if (m_classIndex <= instances.numAttributes() && m_classIndex > 0) {
	400	instances.setClassIndex(m_classIndex - 1);
	401	} else {
	402	throw new Exception("Invalid class index.");
	403	}
	404	}
	405
	406	// can associator handle the data?
	407	getCapabilities().testWithFail(instances);
	408
	409	m_cycles = 0;
	410
	411	// make sure that the lower bound is equal to at least one instance
	412	double lowerBoundMinSupportToUse =
	413	(m_lowerBoundMinSupport * (double)instances.numInstances() < 1.0)
	414	? 1.0 / (double)instances.numInstances()
	415	: m_lowerBoundMinSupport;
	416
	417	if(m_car){
	418	//m_instances does not contain the class attribute
	419	m_instances = LabeledItemSet.divide(instances,false);
	420
	421	//m_onlyClass contains only the class attribute
	422	m_onlyClass = LabeledItemSet.divide(instances,true);
	423	}
	424	else
	425	m_instances = instances;
	426
	427	if(m_car && m_numRules == Integer.MAX_VALUE){
	428	// Set desired minimum support
	429	m_minSupport = lowerBoundMinSupportToUse;
	430	}
	431	else{
	432	// Decrease minimum support until desired number of rules found.
	433	m_minSupport = m_upperBoundMinSupport - m_delta;
	434	m_minSupport = (m_minSupport < lowerBoundMinSupportToUse)
	435	? lowerBoundMinSupportToUse
	436	: m_minSupport;
	437	}
	438
	439	do {
	440
	441	// Reserve space for variables
	442	m_Ls = new FastVector();
	443	m_hashtables = new FastVector();
	444	m_allTheRules = new FastVector[6];
	445	m_allTheRules[0] = new FastVector();
	446	m_allTheRules[1] = new FastVector();
	447	m_allTheRules[2] = new FastVector();
	448	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
	449	m_allTheRules[3] = new FastVector();
	450	m_allTheRules[4] = new FastVector();
	451	m_allTheRules[5] = new FastVector();
	452	}
	453	sortedRuleSet = new FastVector[6];
	454	sortedRuleSet[0] = new FastVector();
	455	sortedRuleSet[1] = new FastVector();
	456	sortedRuleSet[2] = new FastVector();
	457	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
	458	sortedRuleSet[3] = new FastVector();
	459	sortedRuleSet[4] = new FastVector();
	460	sortedRuleSet[5] = new FastVector();
	461	}
	462	if(!m_car){
	463	// Find large itemsets and rules
	464	findLargeItemSets();
	465	if (m_significanceLevel != -1 \|\| m_metricType != CONFIDENCE)
	466	findRulesBruteForce();
	467	else
	468	findRulesQuickly();
	469	}
	470	else{
	471	findLargeCarItemSets();
	472	findCarRulesQuickly();
	473	}
	474
	475	// Sort rules according to their support
	476	/*supports = new double[m_allTheRules[2].size()];
	477	for (int i = 0; i < m_allTheRules[2].size(); i++)
	478	supports[i] = (double)((AprioriItemSet)m_allTheRules[1].elementAt(i)).support();
	479	indices = Utils.stableSort(supports);
	480	for (int i = 0; i < m_allTheRules[2].size(); i++) {
	481	sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i]));
	482	sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i]));
	483	sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i]));
	484	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
	485	sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[i]));
	486	sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[i]));
	487	sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[i]));
	488	}
	489	}*/
	490	int j = m_allTheRules[2].size()-1;
	491	supports = new double[m_allTheRules[2].size()];
	492	for (int i = 0; i < (j+1); i++)
	493	supports[j-i] = ((double)((ItemSet)m_allTheRules[1].elementAt(j-i)).support())*(-1);
	494	indices = Utils.stableSort(supports);
	495	for (int i = 0; i < (j+1); i++) {
	496	sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[j-i]));
	497	sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[j-i]));
	498	sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[j-i]));
	499	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
	500	sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[j-i]));
	501	sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[j-i]));
	502	sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[j-i]));
	503	}
	504	}
	505
	506	// Sort rules according to their confidence
	507	m_allTheRules[0].removeAllElements();
	508	m_allTheRules[1].removeAllElements();
	509	m_allTheRules[2].removeAllElements();
	510	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
	511	m_allTheRules[3].removeAllElements();
	512	m_allTheRules[4].removeAllElements();
	513	m_allTheRules[5].removeAllElements();
	514	}
	515	confidences = new double[sortedRuleSet[2].size()];
	516	int sortType = 2 + m_metricType;
	517
	518	for (int i = 0; i < sortedRuleSet[2].size(); i++)
	519	confidences[i] =
	520	((Double)sortedRuleSet[sortType].elementAt(i)).doubleValue();
	521	indices = Utils.stableSort(confidences);
	522	for (int i = sortedRuleSet[0].size() - 1;
	523	(i >= (sortedRuleSet[0].size() - m_numRules)) && (i >= 0); i--) {
	524	m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i]));
	525	m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i]));
	526	m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i]));
	527	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
	528	m_allTheRules[3].addElement(sortedRuleSet[3].elementAt(indices[i]));
	529	m_allTheRules[4].addElement(sortedRuleSet[4].elementAt(indices[i]));
	530	m_allTheRules[5].addElement(sortedRuleSet[5].elementAt(indices[i]));
	531	}
	532	}
	533
	534	if (m_verbose) {
	535	if (m_Ls.size() > 1) {
	536	System.out.println(toString());
	537	}
	538	}
	539	if(m_minSupport == lowerBoundMinSupportToUse \|\| m_minSupport - m_delta > lowerBoundMinSupportToUse)
	540	m_minSupport -= m_delta;
	541	else
	542	m_minSupport = lowerBoundMinSupportToUse;
	543
	544
	545	necSupport = Math.rint(m_minSupport * (double)m_instances.numInstances());
	546
	547	m_cycles++;
	548	} while ((m_allTheRules[0].size() < m_numRules) &&
	549	(Utils.grOrEq(m_minSupport, lowerBoundMinSupportToUse))
	550	/* (necSupport >= lowerBoundNumInstancesSupport)*/
	551	/* (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) */ &&
	552	(necSupport >= 1));
	553	m_minSupport += m_delta;
	554	}
	555
	556
	557	/**
	558	* Method that mines all class association rules with minimum support and
	559	* with a minimum confidence.
	560	* @return an sorted array of FastVector (confidence depended) containing the rules and metric information
	561	* @param data the instances for which class association rules should be mined
	562	* @throws Exception if rules can't be built successfully
	563	*/
	564	public FastVector[] mineCARs(Instances data) throws Exception{
	565
	566	m_car = true;
	567	buildAssociations(data);
	568	return m_allTheRules;
	569	}
	570
	571	/**
	572	* Gets the instances without the class atrribute.
	573	*
	574	* @return the instances without the class attribute.
	575	*/
	576	public Instances getInstancesNoClass() {
	577
	578	return m_instances;
	579	}
	580
	581
	582	/**
	583	* Gets only the class attribute of the instances.
	584	*
	585	* @return the class attribute of all instances.
	586	*/
	587	public Instances getInstancesOnlyClass() {
	588
	589	return m_onlyClass;
	590	}
	591
	592
	593	/**
	594	* Returns an enumeration describing the available options.
	595	*
	596	* @return an enumeration of all the available options.
	597	*/
	598	public Enumeration listOptions() {
	599
	600	String string1 = "\tThe required number of rules. (default = " + m_numRules + ")",
	601	string2 =
	602	"\tThe minimum confidence of a rule. (default = " + m_minMetric + ")",
	603	string3 = "\tThe delta by which the minimum support is decreased in\n",
	604	string4 = "\teach iteration. (default = " + m_delta + ")",
	605	string5 =
	606	"\tThe lower bound for the minimum support. (default = " +
	607	m_lowerBoundMinSupport + ")",
	608	string6 = "\tIf used, rules are tested for significance at\n",
	609	string7 = "\tthe given level. Slower. (default = no significance testing)",
	610	string8 = "\tIf set the itemsets found are also output. (default = no)",
	611	string9 = "\tIf set class association rules are mined. (default = no)",
	612	string10 = "\tThe class index. (default = last)",
	613	stringType = "\tThe metric type by which to rank rules. (default = "
	614	+"confidence)",
	615	stringZeroAsMissing = "\tTreat zero (i.e. first value of nominal attributes) as " +
	616	"missing";
	617
	618
	619	FastVector newVector = new FastVector(11);
	620
	621	newVector.addElement(new Option(string1, "N", 1,
	622	"-N <required number of rules output>"));
	623	newVector.addElement(new Option(stringType, "T", 1,
	624	"-T <0=confidence \| 1=lift \| "
	625	+"2=leverage \| 3=Conviction>"));
	626	newVector.addElement(new Option(string2, "C", 1,
	627	"-C <minimum metric score of a rule>"));
	628	newVector.addElement(new Option(string3 + string4, "D", 1,
	629	"-D <delta for minimum support>"));
	630	newVector.addElement(new Option("\tUpper bound for minimum support. "
	631	+"(default = 1.0)", "U", 1,
	632	"-U <upper bound for minimum support>"));
	633	newVector.addElement(new Option(string5, "M", 1,
	634	"-M <lower bound for minimum support>"));
	635	newVector.addElement(new Option(string6 + string7, "S", 1,
	636	"-S <significance level>"));
	637	newVector.addElement(new Option(string8, "I", 0,
	638	"-I"));
	639	newVector.addElement(new Option("\tRemove columns that contain "
	640	+"all missing values (default = no)"
	641	, "R", 0,
	642	"-R"));
	643	newVector.addElement(new Option("\tReport progress iteratively. (default "
	644	+"= no)", "V", 0,
	645	"-V"));
	646	newVector.addElement(new Option(string9, "A", 0,
	647	"-A"));
	648	newVector.addElement(new Option(stringZeroAsMissing, "Z", 0,
	649	"-Z"));
	650	newVector.addElement(new Option(string10, "c", 1,
	651	"-c <the class index>"));
	652
	653	return newVector.elements();
	654	}
	655
	656	/**
	657	* Parses a given list of options. <p/>
	658	*
	659	<!-- options-start -->
	660	* Valid options are: <p/>
	661	*
	662	* <pre> -N <required number of rules output>
	663	* The required number of rules. (default = 10)</pre>
	664	*
	665	* <pre> -T <0=confidence \| 1=lift \| 2=leverage \| 3=Conviction>
	666	* The metric type by which to rank rules. (default = confidence)</pre>
	667	*
	668	* <pre> -C <minimum metric score of a rule>
	669	* The minimum confidence of a rule. (default = 0.9)</pre>
	670	*
	671	* <pre> -D <delta for minimum support>
	672	* The delta by which the minimum support is decreased in
	673	* each iteration. (default = 0.05)</pre>
	674	*
	675	* <pre> -U <upper bound for minimum support>
	676	* Upper bound for minimum support. (default = 1.0)</pre>
	677	*
	678	* <pre> -M <lower bound for minimum support>
	679	* The lower bound for the minimum support. (default = 0.1)</pre>
	680	*
	681	* <pre> -S <significance level>
	682	* If used, rules are tested for significance at
	683	* the given level. Slower. (default = no significance testing)</pre>
	684	*
	685	* <pre> -I
	686	* If set the itemsets found are also output. (default = no)</pre>
	687	*
	688	* <pre> -R
	689	* Remove columns that contain all missing values (default = no)</pre>
	690	*
	691	* <pre> -V
	692	* Report progress iteratively. (default = no)</pre>
	693	*
	694	* <pre> -A
	695	* If set class association rules are mined. (default = no)</pre>
	696	*
	697	* <pre> -c <the class index>
	698	* The class index. (default = last)</pre>
	699	*
	700	<!-- options-end -->
	701	*
	702	* @param options the list of options as an array of strings
	703	* @throws Exception if an option is not supported
	704	*/
	705	public void setOptions(String[] options) throws Exception {
	706
	707	resetOptions();
	708	String numRulesString = Utils.getOption('N', options),
	709	minConfidenceString = Utils.getOption('C', options),
	710	deltaString = Utils.getOption('D', options),
	711	maxSupportString = Utils.getOption('U', options),
	712	minSupportString = Utils.getOption('M', options),
	713	significanceLevelString = Utils.getOption('S', options),
	714	classIndexString = Utils.getOption('c',options);
	715
	716	String metricTypeString = Utils.getOption('T', options);
	717	if (metricTypeString.length() != 0) {
	718	setMetricType(new SelectedTag(Integer.parseInt(metricTypeString),
	719	TAGS_SELECTION));
	720	}
	721
	722	if (numRulesString.length() != 0) {
	723	m_numRules = Integer.parseInt(numRulesString);
	724	}
	725	if (classIndexString.length() != 0) {
	726	if (classIndexString.equalsIgnoreCase("last")) {
	727	m_classIndex = -1;
	728	} else if (classIndexString.equalsIgnoreCase("first")) {
	729	m_classIndex = 0;
	730	} else {
	731	m_classIndex = Integer.parseInt(classIndexString);
	732	}
	733	}
	734	if (minConfidenceString.length() != 0) {
	735	m_minMetric = (new Double(minConfidenceString)).doubleValue();
	736	}
	737	if (deltaString.length() != 0) {
	738	m_delta = (new Double(deltaString)).doubleValue();
	739	}
	740	if (maxSupportString.length() != 0) {
	741	setUpperBoundMinSupport((new Double(maxSupportString)).doubleValue());
	742	}
	743	if (minSupportString.length() != 0) {
	744	m_lowerBoundMinSupport = (new Double(minSupportString)).doubleValue();
	745	}
	746	if (significanceLevelString.length() != 0) {
	747	m_significanceLevel = (new Double(significanceLevelString)).doubleValue();
	748	}
	749	m_outputItemSets = Utils.getFlag('I', options);
	750	m_car = Utils.getFlag('A', options);
	751	m_verbose = Utils.getFlag('V', options);
	752	m_treatZeroAsMissing = Utils.getFlag('Z', options);
	753
	754	setRemoveAllMissingCols(Utils.getFlag('R', options));
	755	}
	756
	757	/**
	758	* Gets the current settings of the Apriori object.
	759	*
	760	* @return an array of strings suitable for passing to setOptions
	761	*/
	762	public String [] getOptions() {
	763
	764	String [] options = new String [21];
	765	int current = 0;
	766
	767	if (m_outputItemSets) {
	768	options[current++] = "-I";
	769	}
	770
	771	if (getRemoveAllMissingCols()) {
	772	options[current++] = "-R";
	773	}
	774
	775	options[current++] = "-N"; options[current++] = "" + m_numRules;
	776	options[current++] = "-T"; options[current++] = "" + m_metricType;
	777	options[current++] = "-C"; options[current++] = "" + m_minMetric;
	778	options[current++] = "-D"; options[current++] = "" + m_delta;
	779	options[current++] = "-U"; options[current++] = "" + m_upperBoundMinSupport;
	780	options[current++] = "-M"; options[current++] = "" + m_lowerBoundMinSupport;
	781	options[current++] = "-S"; options[current++] = "" + m_significanceLevel;
	782	if (m_car)
	783	options[current++] = "-A";
	784	if (m_verbose)
	785	options[current++] = "-V";
	786
	787	if (m_treatZeroAsMissing) {
	788	options[current++] = "-Z";
	789	}
	790	options[current++] = "-c"; options[current++] = "" + m_classIndex;
	791
	792	while (current < options.length) {
	793	options[current++] = "";
	794	}
	795	return options;
	796	}
	797
	798	/**
	799	* Outputs the size of all the generated sets of itemsets and the rules.
	800	*
	801	* @return a string representation of the model
	802	*/
	803	public String toString() {
	804
	805	StringBuffer text = new StringBuffer();
	806
	807	if (m_Ls.size() <= 1)
	808	return "\nNo large itemsets and rules found!\n";
	809	text.append("\nApriori\n=======\n\n");
	810	text.append("Minimum support: "
	811	+ Utils.doubleToString(m_minSupport,2)
	812	+ " (" + ((int)(m_minSupport * (double)m_instances.numInstances()+0.5))
	813	+ " instances)"
	814	+ '\n');
	815	text.append("Minimum metric <");
	816	switch(m_metricType) {
	817	case CONFIDENCE:
	818	text.append("confidence>: ");
	819	break;
	820	case LIFT:
	821	text.append("lift>: ");
	822	break;
	823	case LEVERAGE:
	824	text.append("leverage>: ");
	825	break;
	826	case CONVICTION:
	827	text.append("conviction>: ");
	828	break;
	829	}
	830	text.append(Utils.doubleToString(m_minMetric,2)+'\n');
	831
	832	if (m_significanceLevel != -1)
	833	text.append("Significance level: "+
	834	Utils.doubleToString(m_significanceLevel,2)+'\n');
	835	text.append("Number of cycles performed: " + m_cycles+'\n');
	836	text.append("\nGenerated sets of large itemsets:\n");
	837	if(!m_car){
	838	for (int i = 0; i < m_Ls.size(); i++) {
	839	text.append("\nSize of set of large itemsets L("+(i+1)+"): "+
	840	((FastVector)m_Ls.elementAt(i)).size()+'\n');
	841	if (m_outputItemSets) {
	842	text.append("\nLarge Itemsets L("+(i+1)+"):\n");
	843	for (int j = 0; j < ((FastVector)m_Ls.elementAt(i)).size(); j++)
	844	text.append(((AprioriItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).
	845	toString(m_instances)+"\n");
	846	}
	847	}
	848	text.append("\nBest rules found:\n\n");
	849	for (int i = 0; i < m_allTheRules[0].size(); i++) {
	850	text.append(Utils.doubleToString((double)i+1,
	851	(int)(Math.log(m_numRules)/Math.log(10)+1),0)+
	852	". " + ((AprioriItemSet)m_allTheRules[0].elementAt(i)).
	853	toString(m_instances)
	854	+ " ==> " + ((AprioriItemSet)m_allTheRules[1].elementAt(i)).
	855	toString(m_instances) +" conf:("+
	856	Utils.doubleToString(((Double)m_allTheRules[2].
	857	elementAt(i)).doubleValue(),2)+")");
	858	if (m_metricType != CONFIDENCE \|\| m_significanceLevel != -1) {
	859	text.append((m_metricType == LIFT ? " <" : "")+" lift:("+
	860	Utils.doubleToString(((Double)m_allTheRules[3].
	861	elementAt(i)).doubleValue(),2)
	862	+")"+(m_metricType == LIFT ? ">" : ""));
	863	text.append((m_metricType == LEVERAGE ? " <" : "")+" lev:("+
	864	Utils.doubleToString(((Double)m_allTheRules[4].
	865	elementAt(i)).doubleValue(),2)
	866	+")");
	867	text.append(" ["+
	868	(int)(((Double)m_allTheRules[4].elementAt(i))
	869	.doubleValue() * (double)m_instances.numInstances())
	870	+"]"+(m_metricType == LEVERAGE ? ">" : ""));
	871	text.append((m_metricType == CONVICTION ? " <" : "")+" conv:("+
	872	Utils.doubleToString(((Double)m_allTheRules[5].
	873	elementAt(i)).doubleValue(),2)
	874	+")"+(m_metricType == CONVICTION ? ">" : ""));
	875	}
	876	text.append('\n');
	877	}
	878	}
	879	else{
	880	for (int i = 0; i < m_Ls.size(); i++) {
	881	text.append("\nSize of set of large itemsets L("+(i+1)+"): "+
	882	((FastVector)m_Ls.elementAt(i)).size()+'\n');
	883	if (m_outputItemSets) {
	884	text.append("\nLarge Itemsets L("+(i+1)+"):\n");
	885	for (int j = 0; j < ((FastVector)m_Ls.elementAt(i)).size(); j++){
	886	text.append(((ItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).
	887	toString(m_instances)+"\n");
	888	text.append(((LabeledItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).m_classLabel+" ");
	889	text.append(((LabeledItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).support()+"\n");
	890	}
	891	}
	892	}
	893	text.append("\nBest rules found:\n\n");
	894	for (int i = 0; i < m_allTheRules[0].size(); i++) {
	895	text.append(Utils.doubleToString((double)i+1,
	896	(int)(Math.log(m_numRules)/Math.log(10)+1),0)+
	897	". " + ((ItemSet)m_allTheRules[0].elementAt(i)).
	898	toString(m_instances)
	899	+ " ==> " + ((ItemSet)m_allTheRules[1].elementAt(i)).
	900	toString(m_onlyClass) +" conf:("+
	901	Utils.doubleToString(((Double)m_allTheRules[2].
	902	elementAt(i)).doubleValue(),2)+")");
	903
	904	text.append('\n');
	905	}
	906	}
	907	return text.toString();
	908	}
	909
	910	/**
	911	* Returns the metric string for the chosen metric type
	912	* @return a string describing the used metric for the interestingness of a class association rule
	913	*/
	914	public String metricString() {
	915
	916	switch(m_metricType) {
	917	case LIFT:
	918	return "lif";
	919	case LEVERAGE:
	920	return "leverage";
	921	case CONVICTION:
	922	return "conviction";
	923	default:
	924	return "conf";
	925	}
	926	}
	927
	928	/**
	929	* Returns the tip text for this property
	930	* @return tip text for this property suitable for
	931	* displaying in the explorer/experimenter gui
	932	*/
	933	public String removeAllMissingColsTipText() {
	934	return "Remove columns with all missing values.";
	935	}
	936
	937	/**
	938	* Remove columns containing all missing values.
	939	* @param r true if cols are to be removed.
	940	*/
	941	public void setRemoveAllMissingCols(boolean r) {
	942	m_removeMissingCols = r;
	943	}
	944
	945	/**
	946	* Returns whether columns containing all missing values are to be removed
	947	* @return true if columns are to be removed.
	948	*/
	949	public boolean getRemoveAllMissingCols() {
	950	return m_removeMissingCols;
	951	}
	952
	953	/**
	954	* Returns the tip text for this property
	955	* @return tip text for this property suitable for
	956	* displaying in the explorer/experimenter gui
	957	*/
	958	public String upperBoundMinSupportTipText() {
	959	return "Upper bound for minimum support. Start iteratively decreasing "
	960	+"minimum support from this value.";
	961	}
	962
	963	/**
	964	* Get the value of upperBoundMinSupport.
	965	*
	966	* @return Value of upperBoundMinSupport.
	967	*/
	968	public double getUpperBoundMinSupport() {
	969
	970	return m_upperBoundMinSupport;
	971	}
	972
	973	/**
	974	* Set the value of upperBoundMinSupport.
	975	*
	976	* @param v Value to assign to upperBoundMinSupport.
	977	*/
	978	public void setUpperBoundMinSupport(double v) {
	979
	980	m_upperBoundMinSupport = v;
	981	}
	982
	983	/**
	984	* Sets the class index
	985	* @param index the class index
	986	*/
	987	public void setClassIndex(int index){
	988
	989	m_classIndex = index;
	990	}
	991
	992	/**
	993	* Gets the class index
	994	* @return the index of the class attribute
	995	*/
	996	public int getClassIndex(){
	997
	998	return m_classIndex;
	999	}
	1000
	1001	/**
	1002	* Returns the tip text for this property
	1003	* @return tip text for this property suitable for
	1004	* displaying in the explorer/experimenter gui
	1005	*/
	1006	public String classIndexTipText() {
	1007	return "Index of the class attribute. If set to -1, the last attribute is taken as class attribute.";
	1008
	1009	}
	1010
	1011	/**
	1012	* Sets class association rule mining
	1013	* @param flag if class association rules are mined, false otherwise
	1014	*/
	1015	public void setCar(boolean flag){
	1016	m_car = flag;
	1017	}
	1018
	1019	/**
	1020	* Gets whether class association ruels are mined
	1021	* @return true if class association rules are mined, false otherwise
	1022	*/
	1023	public boolean getCar(){
	1024	return m_car;
	1025	}
	1026
	1027	/**
	1028	* Returns the tip text for this property
	1029	* @return tip text for this property suitable for
	1030	* displaying in the explorer/experimenter gui
	1031	*/
	1032	public String carTipText() {
	1033	return "If enabled class association rules are mined instead of (general) association rules.";
	1034	}
	1035
	1036	/**
	1037	* Returns the tip text for this property
	1038	* @return tip text for this property suitable for
	1039	* displaying in the explorer/experimenter gui
	1040	*/
	1041	public String lowerBoundMinSupportTipText() {
	1042	return "Lower bound for minimum support.";
	1043	}
	1044
	1045	/**
	1046	* Get the value of lowerBoundMinSupport.
	1047	*
	1048	* @return Value of lowerBoundMinSupport.
	1049	*/
	1050	public double getLowerBoundMinSupport() {
	1051
	1052	return m_lowerBoundMinSupport;
	1053	}
	1054
	1055	/**
	1056	* Set the value of lowerBoundMinSupport.
	1057	*
	1058	* @param v Value to assign to lowerBoundMinSupport.
	1059	*/
	1060	public void setLowerBoundMinSupport(double v) {
	1061
	1062	m_lowerBoundMinSupport = v;
	1063	}
	1064
	1065	/**
	1066	* Get the metric type
	1067	*
	1068	* @return the type of metric to use for ranking rules
	1069	*/
	1070	public SelectedTag getMetricType() {
	1071	return new SelectedTag(m_metricType, TAGS_SELECTION);
	1072	}
	1073
	1074	/**
	1075	* Returns the tip text for this property
	1076	* @return tip text for this property suitable for
	1077	* displaying in the explorer/experimenter gui
	1078	*/
	1079	public String metricTypeTipText() {
	1080	return "Set the type of metric by which to rank rules. Confidence is "
	1081	+"the proportion of the examples covered by the premise that are also "
	1082	+"covered by the consequence(Class association rules can only be mined using confidence). Lift is confidence divided by the "
	1083	+"proportion of all examples that are covered by the consequence. This "
	1084	+"is a measure of the importance of the association that is independent "
	1085	+"of support. Leverage is the proportion of additional examples covered "
	1086	+"by both the premise and consequence above those expected if the "
	1087	+"premise and consequence were independent of each other. The total "
	1088	+"number of examples that this represents is presented in brackets "
	1089	+"following the leverage. Conviction is "
	1090	+"another measure of departure from independence. Conviction is given "
	1091	+"by ";
	1092	}
	1093
	1094	/**
	1095	* Set the metric type for ranking rules
	1096	*
	1097	* @param d the type of metric
	1098	*/
	1099	public void setMetricType (SelectedTag d) {
	1100
	1101	if (d.getTags() == TAGS_SELECTION) {
	1102	m_metricType = d.getSelectedTag().getID();
	1103	}
	1104
	1105	if (m_significanceLevel != -1 && m_metricType != CONFIDENCE) {
	1106	m_metricType = CONFIDENCE;
	1107	}
	1108
	1109	if (m_metricType == CONFIDENCE) {
	1110	setMinMetric(0.9);
	1111	}
	1112
	1113	if (m_metricType == LIFT \|\| m_metricType == CONVICTION) {
	1114	setMinMetric(1.1);
	1115	}
	1116
	1117	if (m_metricType == LEVERAGE) {
	1118	setMinMetric(0.1);
	1119	}
	1120	}
	1121
	1122	/**
	1123	* Returns the tip text for this property
	1124	* @return tip text for this property suitable for
	1125	* displaying in the explorer/experimenter gui
	1126	*/
	1127	public String minMetricTipText() {
	1128	return "Minimum metric score. Consider only rules with scores higher than "
	1129	+"this value.";
	1130	}
	1131
	1132	/**
	1133	* Get the value of minConfidence.
	1134	*
	1135	* @return Value of minConfidence.
	1136	*/
	1137	public double getMinMetric() {
	1138
	1139	return m_minMetric;
	1140	}
	1141
	1142	/**
	1143	* Set the value of minConfidence.
	1144	*
	1145	* @param v Value to assign to minConfidence.
	1146	*/
	1147	public void setMinMetric(double v) {
	1148
	1149	m_minMetric = v;
	1150	}
	1151
	1152	/**
	1153	* Returns the tip text for this property
	1154	* @return tip text for this property suitable for
	1155	* displaying in the explorer/experimenter gui
	1156	*/
	1157	public String numRulesTipText() {
	1158	return "Number of rules to find.";
	1159	}
	1160
	1161	/**
	1162	* Get the value of numRules.
	1163	*
	1164	* @return Value of numRules.
	1165	*/
	1166	public int getNumRules() {
	1167
	1168	return m_numRules;
	1169	}
	1170
	1171	/**
	1172	* Set the value of numRules.
	1173	*
	1174	* @param v Value to assign to numRules.
	1175	*/
	1176	public void setNumRules(int v) {
	1177
	1178	m_numRules = v;
	1179	}
	1180
	1181	/**
	1182	* Returns the tip text for this property
	1183	* @return tip text for this property suitable for
	1184	* displaying in the explorer/experimenter gui
	1185	*/
	1186	public String deltaTipText() {
	1187	return "Iteratively decrease support by this factor. Reduces support "
	1188	+"until min support is reached or required number of rules has been "
	1189	+"generated.";
	1190	}
	1191
	1192	/**
	1193	* Get the value of delta.
	1194	*
	1195	* @return Value of delta.
	1196	*/
	1197	public double getDelta() {
	1198
	1199	return m_delta;
	1200	}
	1201
	1202	/**
	1203	* Set the value of delta.
	1204	*
	1205	* @param v Value to assign to delta.
	1206	*/
	1207	public void setDelta(double v) {
	1208
	1209	m_delta = v;
	1210	}
	1211
	1212	/**
	1213	* Returns the tip text for this property
	1214	* @return tip text for this property suitable for
	1215	* displaying in the explorer/experimenter gui
	1216	*/
	1217	public String significanceLevelTipText() {
	1218	return "Significance level. Significance test (confidence metric only).";
	1219	}
	1220
	1221	/**
	1222	* Get the value of significanceLevel.
	1223	*
	1224	* @return Value of significanceLevel.
	1225	*/
	1226	public double getSignificanceLevel() {
	1227
	1228	return m_significanceLevel;
	1229	}
	1230
	1231	/**
	1232	* Set the value of significanceLevel.
	1233	*
	1234	* @param v Value to assign to significanceLevel.
	1235	*/
	1236	public void setSignificanceLevel(double v) {
	1237
	1238	m_significanceLevel = v;
	1239	}
	1240
	1241	/**
	1242	* Sets whether itemsets are output as well
	1243	* @param flag true if itemsets are to be output as well
	1244	*/
	1245	public void setOutputItemSets(boolean flag){
	1246	m_outputItemSets = flag;
	1247	}
	1248
	1249	/**
	1250	* Gets whether itemsets are output as well
	1251	* @return true if itemsets are output as well
	1252	*/
	1253	public boolean getOutputItemSets(){
	1254	return m_outputItemSets;
	1255	}
	1256
	1257	/**
	1258	* Returns the tip text for this property
	1259	* @return tip text for this property suitable for
	1260	* displaying in the explorer/experimenter gui
	1261	*/
	1262	public String outputItemSetsTipText() {
	1263	return "If enabled the itemsets are output as well.";
	1264	}
	1265
	1266	/**
	1267	* Sets verbose mode
	1268	* @param flag true if algorithm should be run in verbose mode
	1269	*/
	1270	public void setVerbose(boolean flag){
	1271	m_verbose = flag;
	1272	}
	1273
	1274	/**
	1275	* Gets whether algorithm is run in verbose mode
	1276	* @return true if algorithm is run in verbose mode
	1277	*/
	1278	public boolean getVerbose(){
	1279	return m_verbose;
	1280	}
	1281
	1282	/**
	1283	* Returns the tip text for this property
	1284	* @return tip text for this property suitable for
	1285	* displaying in the explorer/experimenter gui
	1286	*/
	1287	public String verboseTipText() {
	1288	return "If enabled the algorithm will be run in verbose mode.";
	1289	}
	1290
	1291	/**
	1292	* Returns the tip text for this property
	1293	* @return tip text for this property suitable for
	1294	* displaying in the explorer/experimenter gui
	1295	*/
	1296	public String treatZeroAsMissingTipText() {
	1297	return "If enabled, zero (that is, the first value of a nominal) is "
	1298	+ "treated in the same way as a missing value.";
	1299	}
	1300
	1301	/**
	1302	* Sets whether zeros (i.e. the first value of a nominal attribute)
	1303	* should be treated as missing values.
	1304	*
	1305	* @param z true if zeros should be treated as missing values.
	1306	*/
	1307	public void setTreatZeroAsMissing(boolean z) {
	1308	m_treatZeroAsMissing = z;
	1309	}
	1310
	1311	/**
	1312	* Gets whether zeros (i.e. the first value of a nominal attribute)
	1313	* is to be treated int he same way as missing values.
	1314	*
	1315	* @return true if zeros are to be treated like missing values.
	1316	*/
	1317	public boolean getTreatZeroAsMissing() {
	1318	return m_treatZeroAsMissing;
	1319	}
	1320
	1321	/**
	1322	* Method that finds all large itemsets for the given set of instances.
	1323	*
	1324	* @throws Exception if an attribute is numeric
	1325	*/
	1326	private void findLargeItemSets() throws Exception {
	1327
	1328	FastVector kMinusOneSets, kSets;
	1329	Hashtable hashtable;
	1330	int necSupport, necMaxSupport,i = 0;
	1331
	1332
	1333
	1334	// Find large itemsets
	1335
	1336	// minimum support
	1337	necSupport = (int)(m_minSupport * (double)m_instances.numInstances()+0.5);
	1338	necMaxSupport = (int)(m_upperBoundMinSupport * (double)m_instances.numInstances()+0.5);
	1339
	1340	kSets = AprioriItemSet.singletons(m_instances, m_treatZeroAsMissing);
	1341	AprioriItemSet.upDateCounters(kSets,m_instances);
	1342	kSets = AprioriItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
	1343	if (kSets.size() == 0)
	1344	return;
	1345	do {
	1346	m_Ls.addElement(kSets);
	1347	kMinusOneSets = kSets;
	1348	kSets = AprioriItemSet.mergeAllItemSets(kMinusOneSets, i, m_instances.numInstances());
	1349	hashtable = AprioriItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size());
	1350	m_hashtables.addElement(hashtable);
	1351	kSets = AprioriItemSet.pruneItemSets(kSets, hashtable);
	1352	AprioriItemSet.upDateCounters(kSets, m_instances);
	1353	kSets = AprioriItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
	1354	i++;
	1355	} while (kSets.size() > 0);
	1356	}
	1357
	1358	/**
	1359	* Method that finds all association rules and performs significance test.
	1360	*
	1361	* @throws Exception if an attribute is numeric
	1362	*/
	1363	private void findRulesBruteForce() throws Exception {
	1364
	1365	FastVector[] rules;
	1366
	1367	// Build rules
	1368	for (int j = 1; j < m_Ls.size(); j++) {
	1369	FastVector currentItemSets = (FastVector)m_Ls.elementAt(j);
	1370	Enumeration enumItemSets = currentItemSets.elements();
	1371	while (enumItemSets.hasMoreElements()) {
	1372	AprioriItemSet currentItemSet = (AprioriItemSet)enumItemSets.nextElement();
	1373	//AprioriItemSet currentItemSet = new AprioriItemSet((ItemSet)enumItemSets.nextElement());
	1374	rules=currentItemSet.generateRulesBruteForce(m_minMetric,m_metricType,
	1375	m_hashtables,j+1,
	1376	m_instances.numInstances(),
	1377	m_significanceLevel);
	1378	for (int k = 0; k < rules[0].size(); k++) {
	1379	m_allTheRules[0].addElement(rules[0].elementAt(k));
	1380	m_allTheRules[1].addElement(rules[1].elementAt(k));
	1381	m_allTheRules[2].addElement(rules[2].elementAt(k));
	1382
	1383	m_allTheRules[3].addElement(rules[3].elementAt(k));
	1384	m_allTheRules[4].addElement(rules[4].elementAt(k));
	1385	m_allTheRules[5].addElement(rules[5].elementAt(k));
	1386	}
	1387	}
	1388	}
	1389	}
	1390
	1391	/**
	1392	* Method that finds all association rules.
	1393	*
	1394	* @throws Exception if an attribute is numeric
	1395	*/
	1396	private void findRulesQuickly() throws Exception {
	1397
	1398	FastVector[] rules;
	1399
	1400	// Build rules
	1401	for (int j = 1; j < m_Ls.size(); j++) {
	1402	FastVector currentItemSets = (FastVector)m_Ls.elementAt(j);
	1403	Enumeration enumItemSets = currentItemSets.elements();
	1404	while (enumItemSets.hasMoreElements()) {
	1405	AprioriItemSet currentItemSet = (AprioriItemSet)enumItemSets.nextElement();
	1406	//AprioriItemSet currentItemSet = new AprioriItemSet((ItemSet)enumItemSets.nextElement());
	1407	rules = currentItemSet.generateRules(m_minMetric, m_hashtables, j + 1);
	1408	for (int k = 0; k < rules[0].size(); k++) {
	1409	m_allTheRules[0].addElement(rules[0].elementAt(k));
	1410	m_allTheRules[1].addElement(rules[1].elementAt(k));
	1411	m_allTheRules[2].addElement(rules[2].elementAt(k));
	1412	}
	1413	}
	1414	}
	1415	}
	1416
	1417	/**
	1418	*
	1419	* Method that finds all large itemsets for class association rules for the given set of instances.
	1420	* @throws Exception if an attribute is numeric
	1421	*/
	1422	private void findLargeCarItemSets() throws Exception {
	1423
	1424	FastVector kMinusOneSets, kSets;
	1425	Hashtable hashtable;
	1426	int necSupport, necMaxSupport,i = 0;
	1427
	1428	// Find large itemsets
	1429
	1430	// minimum support
	1431	double nextMinSupport = m_minSupport*(double)m_instances.numInstances();
	1432	double nextMaxSupport = m_upperBoundMinSupport*(double)m_instances.numInstances();
	1433	if((double)Math.rint(nextMinSupport) == nextMinSupport){
	1434	necSupport = (int) nextMinSupport;
	1435	}
	1436	else{
	1437	necSupport = Math.round((float)(nextMinSupport+0.5));
	1438	}
	1439	if((double)Math.rint(nextMaxSupport) == nextMaxSupport){
	1440	necMaxSupport = (int) nextMaxSupport;
	1441	}
	1442	else{
	1443	necMaxSupport = Math.round((float)(nextMaxSupport+0.5));
	1444	}
	1445
	1446	//find item sets of length one
	1447	kSets = LabeledItemSet.singletons(m_instances,m_onlyClass);
	1448	LabeledItemSet.upDateCounters(kSets, m_instances,m_onlyClass);
	1449
	1450	//check if a item set of lentgh one is frequent, if not delete it
	1451	kSets = LabeledItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
	1452	if (kSets.size() == 0)
	1453	return;
	1454	do {
	1455	m_Ls.addElement(kSets);
	1456	kMinusOneSets = kSets;
	1457	kSets = LabeledItemSet.mergeAllItemSets(kMinusOneSets, i, m_instances.numInstances());
	1458	hashtable = LabeledItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size());
	1459	kSets = LabeledItemSet.pruneItemSets(kSets, hashtable);
	1460	LabeledItemSet.upDateCounters(kSets, m_instances,m_onlyClass);
	1461	kSets = LabeledItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
	1462	i++;
	1463	} while (kSets.size() > 0);
	1464	}
	1465
	1466
	1467
	1468	/**
	1469	* Method that finds all class association rules.
	1470	*
	1471	* @throws Exception if an attribute is numeric
	1472	*/
	1473	private void findCarRulesQuickly() throws Exception {
	1474
	1475	FastVector[] rules;
	1476
	1477	// Build rules
	1478	for (int j = 0; j < m_Ls.size(); j++) {
	1479	FastVector currentLabeledItemSets = (FastVector)m_Ls.elementAt(j);
	1480	Enumeration enumLabeledItemSets = currentLabeledItemSets.elements();
	1481	while (enumLabeledItemSets.hasMoreElements()) {
	1482	LabeledItemSet currentLabeledItemSet = (LabeledItemSet)enumLabeledItemSets.nextElement();
	1483	rules = currentLabeledItemSet.generateRules(m_minMetric,false);
	1484	for (int k = 0; k < rules[0].size(); k++) {
	1485	m_allTheRules[0].addElement(rules[0].elementAt(k));
	1486	m_allTheRules[1].addElement(rules[1].elementAt(k));
	1487	m_allTheRules[2].addElement(rules[2].elementAt(k));
	1488	}
	1489	}
	1490	}
	1491	}
	1492
	1493	/**
	1494	* returns all the rules
	1495	*
	1496	* @return all the rules
	1497	* @see #m_allTheRules
	1498	*/
	1499	public FastVector[] getAllTheRules() {
	1500	return m_allTheRules;
	1501	}
	1502
	1503	/**
	1504	* Returns the revision string.
	1505	*
	1506	* @return the revision
	1507	*/
	1508	public String getRevision() {
	1509	return RevisionUtils.extract("$Revision: 5698 $");
	1510	}
	1511
	1512	/**
	1513	* Main method.
	1514	*
	1515	* @param args the commandline options
	1516	*/
	1517	public static void main(String[] args) {
	1518	runAssociator(new Apriori(), args);
	1519	}
	1520	}
	1521

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/MetisMQI/src/main/java/weka/associations/Apriori.java

Download in other formats: