source: src/main/java/weka/associations/Apriori.java @ 20

Last change on this file since 20 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 48.2 KB
RevLine 
[4]1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    Apriori.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.associations;
24
25import weka.core.AttributeStats;
26import weka.core.Capabilities;
27import weka.core.FastVector;
28import weka.core.Instances;
29import weka.core.Option;
30import weka.core.OptionHandler;
31import weka.core.RevisionUtils;
32import weka.core.SelectedTag;
33import weka.core.Tag;
34import weka.core.TechnicalInformation;
35import weka.core.TechnicalInformationHandler;
36import weka.core.Utils;
37import weka.core.Capabilities.Capability;
38import weka.core.TechnicalInformation.Field;
39import weka.core.TechnicalInformation.Type;
40import weka.filters.Filter;
41import weka.filters.unsupervised.attribute.Remove;
42
43import java.util.Enumeration;
44import java.util.Hashtable;
45
46/**
47 <!-- globalinfo-start -->
48 * Class implementing an Apriori-type algorithm. Iteratively reduces the minimum support until it finds the required number of rules with the given minimum confidence.<br/>
49 * The algorithm has an option to mine class association rules. It is adapted as explained in the second reference.<br/>
50 * <br/>
51 * For more information see:<br/>
52 * <br/>
53 * R. Agrawal, R. Srikant: Fast Algorithms for Mining Association Rules in Large Databases. In: 20th International Conference on Very Large Data Bases, 478-499, 1994.<br/>
54 * <br/>
55 * Bing Liu, Wynne Hsu, Yiming Ma: Integrating Classification and Association Rule Mining. In: Fourth International Conference on Knowledge Discovery and Data Mining, 80-86, 1998.
56 * <p/>
57 <!-- globalinfo-end -->
58 *
59 <!-- technical-bibtex-start -->
60 * BibTeX:
61 * <pre>
62 * &#64;inproceedings{Agrawal1994,
63 *    author = {R. Agrawal and R. Srikant},
64 *    booktitle = {20th International Conference on Very Large Data Bases},
65 *    pages = {478-499},
66 *    publisher = {Morgan Kaufmann, Los Altos, CA},
67 *    title = {Fast Algorithms for Mining Association Rules in Large Databases},
68 *    year = {1994}
69 * }
70 *
71 * &#64;inproceedings{Liu1998,
72 *    author = {Bing Liu and Wynne Hsu and Yiming Ma},
73 *    booktitle = {Fourth International Conference on Knowledge Discovery and Data Mining},
74 *    pages = {80-86},
75 *    publisher = {AAAI Press},
76 *    title = {Integrating Classification and Association Rule Mining},
77 *    year = {1998}
78 * }
79 * </pre>
80 * <p/>
81 <!-- technical-bibtex-end -->
82 *
83 <!-- options-start -->
84 * Valid options are: <p/>
85 *
86 * <pre> -N &lt;required number of rules output&gt;
87 *  The required number of rules. (default = 10)</pre>
88 *
89 * <pre> -T &lt;0=confidence | 1=lift | 2=leverage | 3=Conviction&gt;
90 *  The metric type by which to rank rules. (default = confidence)</pre>
91 *
92 * <pre> -C &lt;minimum metric score of a rule&gt;
93 *  The minimum confidence of a rule. (default = 0.9)</pre>
94 *
95 * <pre> -D &lt;delta for minimum support&gt;
96 *  The delta by which the minimum support is decreased in
97 *  each iteration. (default = 0.05)</pre>
98 *
99 * <pre> -U &lt;upper bound for minimum support&gt;
100 *  Upper bound for minimum support. (default = 1.0)</pre>
101 *
102 * <pre> -M &lt;lower bound for minimum support&gt;
103 *  The lower bound for the minimum support. (default = 0.1)</pre>
104 *
105 * <pre> -S &lt;significance level&gt;
106 *  If used, rules are tested for significance at
107 *  the given level. Slower. (default = no significance testing)</pre>
108 *
109 * <pre> -I
110 *  If set the itemsets found are also output. (default = no)</pre>
111 *
112 * <pre> -R
113 *  Remove columns that contain all missing values (default = no)</pre>
114 *
115 * <pre> -V
116 *  Report progress iteratively. (default = no)</pre>
117 *
118 * <pre> -A
119 *  If set class association rules are mined. (default = no)</pre>
120 *
121 * <pre> -c &lt;the class index&gt;
122 *  The class index. (default = last)</pre>
123 *
124 <!-- options-end -->
125 *
126 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
127 * @author Mark Hall (mhall@cs.waikato.ac.nz)
128 * @author Stefan Mutter (mutter@cs.waikato.ac.nz)
129 * @version $Revision: 5698 $
130 */
131public class Apriori 
132  extends AbstractAssociator
133  implements OptionHandler, CARuleMiner, TechnicalInformationHandler {
134 
135  /** for serialization */
136  static final long serialVersionUID = 3277498842319212687L;
137 
138  /** The minimum support. */
139  protected double m_minSupport;
140
141  /** The upper bound on the support */
142  protected double m_upperBoundMinSupport;
143
144  /** The lower bound for the minimum support. */
145  protected double m_lowerBoundMinSupport;
146
147  /** Metric type: Confidence */
148  protected static final int CONFIDENCE = 0;
149  /** Metric type: Lift */
150  protected static final int LIFT = 1;
151  /** Metric type: Leverage */
152  protected static final int LEVERAGE = 2;
153  /** Metric type: Conviction */
154  protected static final int CONVICTION = 3;
155  /** Metric types. */
156  public static final Tag [] TAGS_SELECTION = {
157    new Tag(CONFIDENCE, "Confidence"),
158    new Tag(LIFT, "Lift"),
159    new Tag(LEVERAGE, "Leverage"),
160    new Tag(CONVICTION, "Conviction")
161      };
162
163  /** The selected metric type. */
164  protected int m_metricType = CONFIDENCE;
165
166  /** The minimum metric score. */
167  protected double m_minMetric;
168
169  /** The maximum number of rules that are output. */
170  protected int m_numRules;
171
172  /** Delta by which m_minSupport is decreased in each iteration. */
173  protected double m_delta;
174
175  /** Significance level for optional significance test. */
176  protected double m_significanceLevel;
177
178  /** Number of cycles used before required number of rules was one. */
179  protected int m_cycles;
180
181  /** The set of all sets of itemsets L. */
182  protected FastVector m_Ls;
183
184  /** The same information stored in hash tables. */
185  protected FastVector m_hashtables;
186
187  /** The list of all generated rules. */
188  protected FastVector[] m_allTheRules;
189
190  /** The instances (transactions) to be used for generating
191      the association rules. */
192  protected Instances m_instances;
193
194  /** Output itemsets found? */
195  protected boolean m_outputItemSets;
196
197  /** Remove columns with all missing values */
198  protected boolean m_removeMissingCols;
199
200  /** Report progress iteratively */
201  protected boolean m_verbose;
202 
203  /** Only the class attribute of all Instances.*/
204  protected Instances m_onlyClass;
205 
206  /** The class index. */ 
207  protected int m_classIndex;
208 
209  /** Flag indicating whether class association rules are mined. */
210  protected boolean m_car;
211 
212  /**
213   * Treat zeros as missing (rather than a value in their
214   * own right)
215   */
216  protected boolean m_treatZeroAsMissing = false;
217
218  /**
219   * Returns a string describing this associator
220   * @return a description of the evaluator suitable for
221   * displaying in the explorer/experimenter gui
222   */
223  public String globalInfo() {
224    return 
225        "Class implementing an Apriori-type algorithm. Iteratively reduces "
226      + "the minimum support until it finds the required number of rules with "
227      + "the given minimum confidence.\n"
228      + "The algorithm has an option to mine class association rules. It is "
229      + "adapted as explained in the second reference.\n\n"
230      + "For more information see:\n\n"
231      + getTechnicalInformation().toString();
232  }
233 
234  /**
235   * Returns an instance of a TechnicalInformation object, containing
236   * detailed information about the technical background of this class,
237   * e.g., paper reference or book this class is based on.
238   *
239   * @return the technical information about this class
240   */
241  public TechnicalInformation getTechnicalInformation() {
242    TechnicalInformation        result;
243    TechnicalInformation        additional;
244   
245    result = new TechnicalInformation(Type.INPROCEEDINGS);
246    result.setValue(Field.AUTHOR, "R. Agrawal and R. Srikant");
247    result.setValue(Field.TITLE, "Fast Algorithms for Mining Association Rules in Large Databases");
248    result.setValue(Field.BOOKTITLE, "20th International Conference on Very Large Data Bases");
249    result.setValue(Field.YEAR, "1994");
250    result.setValue(Field.PAGES, "478-499");
251    result.setValue(Field.PUBLISHER, "Morgan Kaufmann, Los Altos, CA");
252
253    additional = result.add(Type.INPROCEEDINGS);
254    additional.setValue(Field.AUTHOR, "Bing Liu and Wynne Hsu and Yiming Ma");
255    additional.setValue(Field.TITLE, "Integrating Classification and Association Rule Mining");
256    additional.setValue(Field.BOOKTITLE, "Fourth International Conference on Knowledge Discovery and Data Mining");
257    additional.setValue(Field.YEAR, "1998");
258    additional.setValue(Field.PAGES, "80-86");
259    additional.setValue(Field.PUBLISHER, "AAAI Press");
260   
261    return result;
262  }
263
264  /**
265   * Constructor that allows to sets default values for the
266   * minimum confidence and the maximum number of rules
267   * the minimum confidence.
268   */
269  public Apriori() {
270
271    resetOptions();
272  }
273
274  /**
275   * Resets the options to the default values.
276   */
277  public void resetOptions() {
278   
279    m_removeMissingCols = false;
280    m_verbose = false;
281    m_delta = 0.05;
282    m_minMetric = 0.90;
283    m_numRules = 10;
284    m_lowerBoundMinSupport = 0.1;
285    m_upperBoundMinSupport = 1.0;
286    m_significanceLevel = -1;
287    m_outputItemSets = false;
288    m_car = false;
289    m_classIndex = -1;
290  }
291
292  /**
293   * Removes columns that are all missing from the data
294   * @param instances the instances
295   * @return a new set of instances with all missing columns removed
296   * @throws Exception if something goes wrong
297   */
298  protected Instances removeMissingColumns(Instances instances) 
299    throws Exception {
300   
301    int numInstances = instances.numInstances();
302    StringBuffer deleteString = new StringBuffer();
303    int removeCount = 0;
304    boolean first = true;
305    int maxCount = 0;
306   
307    for (int i=0;i<instances.numAttributes();i++) {
308      AttributeStats as = instances.attributeStats(i);
309      if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
310        // see if we can decrease this by looking for the most frequent value
311        int [] counts = as.nominalCounts;
312        if (counts[Utils.maxIndex(counts)] > maxCount) {
313          maxCount = counts[Utils.maxIndex(counts)];
314        }
315      }
316      if (as.missingCount == numInstances) {
317        if (first) {
318          deleteString.append((i+1));
319          first = false;
320        } else {
321          deleteString.append(","+(i+1));
322        }
323        removeCount++;
324      }
325    }
326    if (m_verbose) {
327      System.err.println("Removed : "+removeCount+" columns with all missing "
328                         +"values.");
329    }
330    if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
331      m_upperBoundMinSupport = (double)maxCount / (double)numInstances;
332      if (m_verbose) {
333        System.err.println("Setting upper bound min support to : "
334                           +m_upperBoundMinSupport);
335      }
336    }
337
338    if (deleteString.toString().length() > 0) {
339      Remove af = new Remove();
340      af.setAttributeIndices(deleteString.toString());
341      af.setInvertSelection(false);
342      af.setInputFormat(instances);
343      Instances newInst = Filter.useFilter(instances, af);
344
345      return newInst;
346    }
347    return instances;
348  }
349
350  /**
351   * Returns default capabilities of the classifier.
352   *
353   * @return      the capabilities of this classifier
354   */
355  public Capabilities getCapabilities() {
356    Capabilities result = super.getCapabilities();
357    result.disableAll();
358
359    // enable what we can handle
360   
361    // attributes
362    result.enable(Capability.NOMINAL_ATTRIBUTES);
363    result.enable(Capability.MISSING_VALUES);
364
365    // class (can handle a nominal class if CAR rules are selected). This
366    result.enable(Capability.NO_CLASS);
367    result.enable(Capability.NOMINAL_CLASS);
368    result.enable(Capability.MISSING_CLASS_VALUES);
369   
370    return result;
371  }
372
373  /**
374   * Method that generates all large itemsets with a minimum support, and from
375   * these all association rules with a minimum confidence.
376   *
377   * @param instances the instances to be used for generating the associations
378   * @throws Exception if rules can't be built successfully
379   */
380  public void buildAssociations(Instances instances) throws Exception {
381
382    double[] confidences, supports;
383    int[] indices;
384    FastVector[] sortedRuleSet;
385    double necSupport=0;
386
387    instances = new Instances(instances);
388   
389    if (m_removeMissingCols) {
390      instances = removeMissingColumns(instances);
391    }
392    if(m_car && m_metricType != CONFIDENCE)
393      throw new Exception("For CAR-Mining metric type has to be confidence!");
394   
395    // only set class index if CAR is requested
396    if (m_car) {
397      if (m_classIndex == -1 ) {
398        instances.setClassIndex(instances.numAttributes()-1);     
399      } else if (m_classIndex <= instances.numAttributes() && m_classIndex > 0) {
400        instances.setClassIndex(m_classIndex - 1);
401      } else {
402        throw new Exception("Invalid class index.");
403      }
404    }
405
406    // can associator handle the data?
407    getCapabilities().testWithFail(instances);
408
409    m_cycles = 0;
410   
411    // make sure that the lower bound is equal to at least one instance
412    double lowerBoundMinSupportToUse = 
413      (m_lowerBoundMinSupport * (double)instances.numInstances() < 1.0)
414      ? 1.0 / (double)instances.numInstances()
415          : m_lowerBoundMinSupport;
416   
417    if(m_car){
418        //m_instances does not contain the class attribute
419        m_instances = LabeledItemSet.divide(instances,false);
420   
421        //m_onlyClass contains only the class attribute
422        m_onlyClass = LabeledItemSet.divide(instances,true);
423    }
424    else
425        m_instances = instances;
426   
427    if(m_car && m_numRules == Integer.MAX_VALUE){
428        // Set desired minimum support
429        m_minSupport = lowerBoundMinSupportToUse;
430    }
431    else{
432        // Decrease minimum support until desired number of rules found.
433        m_minSupport = m_upperBoundMinSupport - m_delta;
434        m_minSupport = (m_minSupport < lowerBoundMinSupportToUse) 
435            ? lowerBoundMinSupportToUse
436            : m_minSupport;
437    }
438
439    do {
440
441      // Reserve space for variables
442      m_Ls = new FastVector();
443      m_hashtables = new FastVector();
444      m_allTheRules = new FastVector[6];
445      m_allTheRules[0] = new FastVector();
446      m_allTheRules[1] = new FastVector();
447      m_allTheRules[2] = new FastVector();
448      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
449        m_allTheRules[3] = new FastVector();
450        m_allTheRules[4] = new FastVector();
451        m_allTheRules[5] = new FastVector();
452      }
453      sortedRuleSet = new FastVector[6];
454      sortedRuleSet[0] = new FastVector();
455      sortedRuleSet[1] = new FastVector();
456      sortedRuleSet[2] = new FastVector();
457      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
458        sortedRuleSet[3] = new FastVector();
459        sortedRuleSet[4] = new FastVector();
460        sortedRuleSet[5] = new FastVector();
461      }
462      if(!m_car){
463        // Find large itemsets and rules
464        findLargeItemSets();
465        if (m_significanceLevel != -1 || m_metricType != CONFIDENCE) 
466            findRulesBruteForce();
467        else
468            findRulesQuickly();
469      }
470      else{
471          findLargeCarItemSets();
472          findCarRulesQuickly();
473      }
474     
475      // Sort rules according to their support
476      /*supports = new double[m_allTheRules[2].size()];
477      for (int i = 0; i < m_allTheRules[2].size(); i++)
478        supports[i] = (double)((AprioriItemSet)m_allTheRules[1].elementAt(i)).support();
479      indices = Utils.stableSort(supports);
480      for (int i = 0; i < m_allTheRules[2].size(); i++) {
481        sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i]));
482        sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i]));
483        sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i]));
484        if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
485          sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[i]));
486          sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[i]));
487          sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[i]));
488        }
489      }*/
490      int j = m_allTheRules[2].size()-1;
491      supports = new double[m_allTheRules[2].size()];
492      for (int i = 0; i < (j+1); i++) 
493        supports[j-i] = ((double)((ItemSet)m_allTheRules[1].elementAt(j-i)).support())*(-1);
494      indices = Utils.stableSort(supports);
495      for (int i = 0; i < (j+1); i++) {
496        sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[j-i]));
497        sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[j-i]));
498        sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[j-i]));
499        if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
500          sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[j-i]));
501          sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[j-i]));
502          sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[j-i]));
503        }
504      }
505
506      // Sort rules according to their confidence
507      m_allTheRules[0].removeAllElements();
508      m_allTheRules[1].removeAllElements();
509      m_allTheRules[2].removeAllElements();
510      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
511        m_allTheRules[3].removeAllElements();
512        m_allTheRules[4].removeAllElements();
513        m_allTheRules[5].removeAllElements();
514      }
515      confidences = new double[sortedRuleSet[2].size()];
516      int sortType = 2 + m_metricType;
517
518      for (int i = 0; i < sortedRuleSet[2].size(); i++) 
519        confidences[i] = 
520          ((Double)sortedRuleSet[sortType].elementAt(i)).doubleValue();
521      indices = Utils.stableSort(confidences);
522      for (int i = sortedRuleSet[0].size() - 1; 
523           (i >= (sortedRuleSet[0].size() - m_numRules)) && (i >= 0); i--) {
524        m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i]));
525        m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i]));
526        m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i]));
527        if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
528          m_allTheRules[3].addElement(sortedRuleSet[3].elementAt(indices[i]));
529          m_allTheRules[4].addElement(sortedRuleSet[4].elementAt(indices[i]));
530          m_allTheRules[5].addElement(sortedRuleSet[5].elementAt(indices[i]));
531        }
532      }
533
534      if (m_verbose) {
535        if (m_Ls.size() > 1) {
536          System.out.println(toString());
537        }
538      }
539      if(m_minSupport == lowerBoundMinSupportToUse || m_minSupport - m_delta >  lowerBoundMinSupportToUse)
540        m_minSupport -= m_delta;
541      else
542        m_minSupport = lowerBoundMinSupportToUse;
543     
544     
545      necSupport = Math.rint(m_minSupport * (double)m_instances.numInstances());
546
547      m_cycles++;
548    } while ((m_allTheRules[0].size() < m_numRules) &&
549             (Utils.grOrEq(m_minSupport, lowerBoundMinSupportToUse))
550             /*      (necSupport >= lowerBoundNumInstancesSupport)*/
551             /*      (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) */ &&     
552             (necSupport >= 1));
553    m_minSupport += m_delta;
554  }
555 
556 
557      /**
558     * Method that mines all class association rules with minimum support and
559     * with a minimum confidence.
560     * @return an sorted array of FastVector (confidence depended) containing the rules and metric information
561     * @param data the instances for which class association rules should be mined
562     * @throws Exception if rules can't be built successfully
563     */
564    public FastVector[] mineCARs(Instances data) throws Exception{
565         
566        m_car = true;
567        buildAssociations(data);
568        return m_allTheRules;
569    }
570
571   /**
572   * Gets the instances without the class atrribute.
573   *
574   * @return the instances without the class attribute.
575   */ 
576  public Instances getInstancesNoClass() {
577     
578      return m_instances;
579  } 
580 
581 
582  /**
583   * Gets only the class attribute of the instances.
584   *
585   * @return the class attribute of all instances.
586   */ 
587  public Instances getInstancesOnlyClass() {
588     
589      return m_onlyClass;
590  } 
591
592
593  /**
594   * Returns an enumeration describing the available options.
595   *
596   * @return an enumeration of all the available options.
597   */
598  public Enumeration listOptions() {
599
600    String string1 = "\tThe required number of rules. (default = " + m_numRules + ")",
601      string2 = 
602      "\tThe minimum confidence of a rule. (default = " + m_minMetric + ")",
603      string3 = "\tThe delta by which the minimum support is decreased in\n",
604      string4 = "\teach iteration. (default = " + m_delta + ")",
605      string5 = 
606      "\tThe lower bound for the minimum support. (default = " + 
607      m_lowerBoundMinSupport + ")",
608      string6 = "\tIf used, rules are tested for significance at\n",
609      string7 = "\tthe given level. Slower. (default = no significance testing)",
610      string8 = "\tIf set the itemsets found are also output. (default = no)",
611      string9 = "\tIf set class association rules are mined. (default = no)",
612      string10 = "\tThe class index. (default = last)",
613      stringType = "\tThe metric type by which to rank rules. (default = "
614      +"confidence)",
615      stringZeroAsMissing = "\tTreat zero (i.e. first value of nominal attributes) as " +
616                "missing";
617   
618
619    FastVector newVector = new FastVector(11);
620
621    newVector.addElement(new Option(string1, "N", 1, 
622                                    "-N <required number of rules output>"));
623    newVector.addElement(new Option(stringType, "T", 1,
624                                    "-T <0=confidence | 1=lift | "
625                                    +"2=leverage | 3=Conviction>"));
626    newVector.addElement(new Option(string2, "C", 1, 
627                                    "-C <minimum metric score of a rule>"));
628    newVector.addElement(new Option(string3 + string4, "D", 1,
629                                    "-D <delta for minimum support>"));
630    newVector.addElement(new Option("\tUpper bound for minimum support. "
631                                    +"(default = 1.0)", "U", 1,
632                                     "-U <upper bound for minimum support>"));
633    newVector.addElement(new Option(string5, "M", 1,
634                                    "-M <lower bound for minimum support>"));
635    newVector.addElement(new Option(string6 + string7, "S", 1,
636                                    "-S <significance level>"));
637    newVector.addElement(new Option(string8, "I", 0,
638                                    "-I"));
639    newVector.addElement(new Option("\tRemove columns that contain "
640                                    +"all missing values (default = no)"
641                                    , "R", 0,
642                                    "-R"));
643    newVector.addElement(new Option("\tReport progress iteratively. (default "
644                                    +"= no)", "V", 0,
645                                    "-V"));
646    newVector.addElement(new Option(string9, "A", 0,
647                                    "-A"));
648    newVector.addElement(new Option(stringZeroAsMissing, "Z", 0,
649        "-Z"));
650    newVector.addElement(new Option(string10, "c", 1,
651                                    "-c <the class index>"));
652   
653    return newVector.elements();
654  }
655
656  /**
657   * Parses a given list of options. <p/>
658   *
659   <!-- options-start -->
660   * Valid options are: <p/>
661   *
662   * <pre> -N &lt;required number of rules output&gt;
663   *  The required number of rules. (default = 10)</pre>
664   *
665   * <pre> -T &lt;0=confidence | 1=lift | 2=leverage | 3=Conviction&gt;
666   *  The metric type by which to rank rules. (default = confidence)</pre>
667   *
668   * <pre> -C &lt;minimum metric score of a rule&gt;
669   *  The minimum confidence of a rule. (default = 0.9)</pre>
670   *
671   * <pre> -D &lt;delta for minimum support&gt;
672   *  The delta by which the minimum support is decreased in
673   *  each iteration. (default = 0.05)</pre>
674   *
675   * <pre> -U &lt;upper bound for minimum support&gt;
676   *  Upper bound for minimum support. (default = 1.0)</pre>
677   *
678   * <pre> -M &lt;lower bound for minimum support&gt;
679   *  The lower bound for the minimum support. (default = 0.1)</pre>
680   *
681   * <pre> -S &lt;significance level&gt;
682   *  If used, rules are tested for significance at
683   *  the given level. Slower. (default = no significance testing)</pre>
684   *
685   * <pre> -I
686   *  If set the itemsets found are also output. (default = no)</pre>
687   *
688   * <pre> -R
689   *  Remove columns that contain all missing values (default = no)</pre>
690   *
691   * <pre> -V
692   *  Report progress iteratively. (default = no)</pre>
693   *
694   * <pre> -A
695   *  If set class association rules are mined. (default = no)</pre>
696   *
697   * <pre> -c &lt;the class index&gt;
698   *  The class index. (default = last)</pre>
699   *
700   <!-- options-end -->
701   *
702   * @param options the list of options as an array of strings
703   * @throws Exception if an option is not supported
704   */
705  public void setOptions(String[] options) throws Exception {
706   
707    resetOptions();
708    String numRulesString = Utils.getOption('N', options),
709      minConfidenceString = Utils.getOption('C', options),
710      deltaString = Utils.getOption('D', options),
711      maxSupportString = Utils.getOption('U', options),
712      minSupportString = Utils.getOption('M', options),
713      significanceLevelString = Utils.getOption('S', options),
714      classIndexString = Utils.getOption('c',options);
715   
716    String metricTypeString = Utils.getOption('T', options);
717    if (metricTypeString.length() != 0) {
718      setMetricType(new SelectedTag(Integer.parseInt(metricTypeString),
719                                    TAGS_SELECTION));
720    }
721   
722    if (numRulesString.length() != 0) {
723      m_numRules = Integer.parseInt(numRulesString);
724    }
725    if (classIndexString.length() != 0) {
726      if (classIndexString.equalsIgnoreCase("last")) {
727        m_classIndex = -1;
728      } else if (classIndexString.equalsIgnoreCase("first")) {
729        m_classIndex = 0;
730      } else {
731        m_classIndex = Integer.parseInt(classIndexString);
732      }
733    }
734    if (minConfidenceString.length() != 0) {
735      m_minMetric = (new Double(minConfidenceString)).doubleValue();
736    }
737    if (deltaString.length() != 0) {
738      m_delta = (new Double(deltaString)).doubleValue();
739    }
740    if (maxSupportString.length() != 0) {
741      setUpperBoundMinSupport((new Double(maxSupportString)).doubleValue());
742    }
743    if (minSupportString.length() != 0) {
744      m_lowerBoundMinSupport = (new Double(minSupportString)).doubleValue();
745    }
746    if (significanceLevelString.length() != 0) {
747      m_significanceLevel = (new Double(significanceLevelString)).doubleValue();
748    }
749    m_outputItemSets = Utils.getFlag('I', options);
750    m_car = Utils.getFlag('A', options);
751    m_verbose = Utils.getFlag('V', options);
752    m_treatZeroAsMissing = Utils.getFlag('Z', options);
753   
754    setRemoveAllMissingCols(Utils.getFlag('R', options));
755  }
756
757  /**
758   * Gets the current settings of the Apriori object.
759   *
760   * @return an array of strings suitable for passing to setOptions
761   */
762  public String [] getOptions() {
763
764    String [] options = new String [21];
765    int current = 0;
766
767    if (m_outputItemSets) {
768      options[current++] = "-I";
769    }
770
771    if (getRemoveAllMissingCols()) {
772      options[current++] = "-R";
773    }
774
775    options[current++] = "-N"; options[current++] = "" + m_numRules;
776    options[current++] = "-T"; options[current++] = "" + m_metricType;
777    options[current++] = "-C"; options[current++] = "" + m_minMetric;
778    options[current++] = "-D"; options[current++] = "" + m_delta;
779    options[current++] = "-U"; options[current++] = "" + m_upperBoundMinSupport;
780    options[current++] = "-M"; options[current++] = "" + m_lowerBoundMinSupport;
781    options[current++] = "-S"; options[current++] = "" + m_significanceLevel;
782    if (m_car)
783      options[current++] = "-A";
784    if (m_verbose)
785      options[current++] = "-V";
786   
787    if (m_treatZeroAsMissing) {
788      options[current++] = "-Z";
789    }
790    options[current++] = "-c"; options[current++] = "" + m_classIndex;
791   
792    while (current < options.length) {
793      options[current++] = "";
794    }
795    return options;
796  }
797
798  /**
799   * Outputs the size of all the generated sets of itemsets and the rules.
800   *
801   * @return a string representation of the model
802   */
803  public String toString() {
804
805    StringBuffer text = new StringBuffer();
806
807    if (m_Ls.size() <= 1)
808      return "\nNo large itemsets and rules found!\n";
809    text.append("\nApriori\n=======\n\n");
810    text.append("Minimum support: " 
811                + Utils.doubleToString(m_minSupport,2) 
812                + " (" + ((int)(m_minSupport * (double)m_instances.numInstances()+0.5)) 
813                + " instances)"
814                + '\n');
815    text.append("Minimum metric <");
816    switch(m_metricType) {
817    case CONFIDENCE:
818      text.append("confidence>: ");
819      break;
820    case LIFT:
821      text.append("lift>: ");
822      break;
823    case LEVERAGE:
824      text.append("leverage>: ");
825      break;
826    case CONVICTION:
827      text.append("conviction>: ");
828      break;
829    }
830    text.append(Utils.doubleToString(m_minMetric,2)+'\n');
831   
832    if (m_significanceLevel != -1)
833      text.append("Significance level: "+
834                  Utils.doubleToString(m_significanceLevel,2)+'\n');
835    text.append("Number of cycles performed: " + m_cycles+'\n');
836    text.append("\nGenerated sets of large itemsets:\n");
837    if(!m_car){
838        for (int i = 0; i < m_Ls.size(); i++) {
839            text.append("\nSize of set of large itemsets L("+(i+1)+"): "+
840                  ((FastVector)m_Ls.elementAt(i)).size()+'\n');
841            if (m_outputItemSets) {
842                text.append("\nLarge Itemsets L("+(i+1)+"):\n");
843                for (int j = 0; j < ((FastVector)m_Ls.elementAt(i)).size(); j++)
844                    text.append(((AprioriItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).
845                      toString(m_instances)+"\n");
846            }
847        }
848        text.append("\nBest rules found:\n\n");
849        for (int i = 0; i < m_allTheRules[0].size(); i++) {
850            text.append(Utils.doubleToString((double)i+1, 
851                  (int)(Math.log(m_numRules)/Math.log(10)+1),0)+
852                  ". " + ((AprioriItemSet)m_allTheRules[0].elementAt(i)).
853                  toString(m_instances) 
854                  + " ==> " + ((AprioriItemSet)m_allTheRules[1].elementAt(i)).
855                  toString(m_instances) +"    conf:("+ 
856                  Utils.doubleToString(((Double)m_allTheRules[2].
857                                        elementAt(i)).doubleValue(),2)+")");
858            if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
859                text.append((m_metricType == LIFT ? " <" : "")+" lift:("+ 
860                    Utils.doubleToString(((Double)m_allTheRules[3].
861                                          elementAt(i)).doubleValue(),2)
862                    +")"+(m_metricType == LIFT ? ">" : ""));
863                text.append((m_metricType == LEVERAGE ? " <" : "")+" lev:("+ 
864                    Utils.doubleToString(((Double)m_allTheRules[4].
865                                          elementAt(i)).doubleValue(),2)
866                    +")");
867                text.append(" ["+
868                    (int)(((Double)m_allTheRules[4].elementAt(i))
869                          .doubleValue() * (double)m_instances.numInstances())
870                    +"]"+(m_metricType == LEVERAGE ? ">" : ""));
871                text.append((m_metricType == CONVICTION ? " <" : "")+" conv:("+ 
872                    Utils.doubleToString(((Double)m_allTheRules[5].
873                                          elementAt(i)).doubleValue(),2)
874                    +")"+(m_metricType == CONVICTION ? ">" : ""));
875            }
876            text.append('\n');
877        }
878    }
879    else{
880        for (int i = 0; i < m_Ls.size(); i++) {
881            text.append("\nSize of set of large itemsets L("+(i+1)+"): "+
882                  ((FastVector)m_Ls.elementAt(i)).size()+'\n');
883            if (m_outputItemSets) {
884                text.append("\nLarge Itemsets L("+(i+1)+"):\n");
885                for (int j = 0; j < ((FastVector)m_Ls.elementAt(i)).size(); j++){
886                    text.append(((ItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).
887                      toString(m_instances)+"\n");
888                    text.append(((LabeledItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).m_classLabel+"  ");
889                    text.append(((LabeledItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).support()+"\n");
890                }
891            }
892        }
893        text.append("\nBest rules found:\n\n");
894        for (int i = 0; i < m_allTheRules[0].size(); i++) {
895            text.append(Utils.doubleToString((double)i+1, 
896                                             (int)(Math.log(m_numRules)/Math.log(10)+1),0)+
897                        ". " + ((ItemSet)m_allTheRules[0].elementAt(i)).
898                        toString(m_instances) 
899                        + " ==> " + ((ItemSet)m_allTheRules[1].elementAt(i)).
900                        toString(m_onlyClass) +"    conf:("+ 
901                        Utils.doubleToString(((Double)m_allTheRules[2].
902                                              elementAt(i)).doubleValue(),2)+")");
903       
904            text.append('\n');
905        }
906    }
907    return text.toString();
908  }
909 
910   /**
911   * Returns the metric string for the chosen metric type
912   * @return a string describing the used metric for the interestingness of a class association rule
913   */
914  public String metricString() {
915     
916        switch(m_metricType) {
917        case LIFT:
918            return "lif";
919        case LEVERAGE:
920            return "leverage"; 
921        case CONVICTION:
922            return "conviction";
923        default:
924            return "conf";
925        }
926  }
927
928  /**
929   * Returns the tip text for this property
930   * @return tip text for this property suitable for
931   * displaying in the explorer/experimenter gui
932   */
933  public String removeAllMissingColsTipText() {
934    return "Remove columns with all missing values.";
935  }
936
937  /**
938   * Remove columns containing all missing values.
939   * @param r true if cols are to be removed.
940   */
941  public void setRemoveAllMissingCols(boolean r) {
942    m_removeMissingCols = r;
943  }
944
945  /**
946   * Returns whether columns containing all missing values are to be removed
947   * @return true if columns are to be removed.
948   */
949  public boolean getRemoveAllMissingCols() {
950    return m_removeMissingCols;
951  }
952
953  /**
954   * Returns the tip text for this property
955   * @return tip text for this property suitable for
956   * displaying in the explorer/experimenter gui
957   */
958  public String upperBoundMinSupportTipText() {
959    return "Upper bound for minimum support. Start iteratively decreasing "
960      +"minimum support from this value.";
961  }
962
963  /**
964   * Get the value of upperBoundMinSupport.
965   *
966   * @return Value of upperBoundMinSupport.
967   */
968  public double getUpperBoundMinSupport() {
969   
970    return m_upperBoundMinSupport;
971  }
972 
973  /**
974   * Set the value of upperBoundMinSupport.
975   *
976   * @param v  Value to assign to upperBoundMinSupport.
977   */
978  public void setUpperBoundMinSupport(double v) {
979   
980    m_upperBoundMinSupport = v;
981  }
982
983   /**
984   * Sets the class index
985   * @param index the class index
986   */ 
987  public void setClassIndex(int index){
988     
989      m_classIndex = index;
990  }
991 
992  /**
993   * Gets the class index
994   * @return the index of the class attribute
995   */ 
996  public int getClassIndex(){
997     
998      return m_classIndex;
999  }
1000
1001  /**
1002   * Returns the tip text for this property
1003   * @return tip text for this property suitable for
1004   * displaying in the explorer/experimenter gui
1005   */
1006  public String classIndexTipText() {
1007    return "Index of the class attribute. If set to -1, the last attribute is taken as class attribute.";
1008
1009  }
1010
1011  /**
1012   * Sets class association rule mining
1013   * @param flag if class association rules are mined, false otherwise
1014   */ 
1015  public void setCar(boolean flag){
1016      m_car = flag;
1017  }
1018 
1019  /**
1020   * Gets whether class association ruels are mined
1021   * @return true if class association rules are mined, false otherwise
1022   */ 
1023  public boolean getCar(){
1024      return m_car;
1025  }
1026
1027  /**
1028   * Returns the tip text for this property
1029   * @return tip text for this property suitable for
1030   * displaying in the explorer/experimenter gui
1031   */
1032  public String carTipText() {
1033    return "If enabled class association rules are mined instead of (general) association rules.";
1034  }
1035
1036  /**
1037   * Returns the tip text for this property
1038   * @return tip text for this property suitable for
1039   * displaying in the explorer/experimenter gui
1040   */
1041  public String lowerBoundMinSupportTipText() {
1042    return "Lower bound for minimum support.";
1043  }
1044
1045  /**
1046   * Get the value of lowerBoundMinSupport.
1047   *
1048   * @return Value of lowerBoundMinSupport.
1049   */
1050  public double getLowerBoundMinSupport() {
1051   
1052    return m_lowerBoundMinSupport;
1053  }
1054 
1055  /**
1056   * Set the value of lowerBoundMinSupport.
1057   *
1058   * @param v  Value to assign to lowerBoundMinSupport.
1059   */
1060  public void setLowerBoundMinSupport(double v) {
1061   
1062    m_lowerBoundMinSupport = v;
1063  }
1064 
1065  /**
1066   * Get the metric type
1067   *
1068   * @return the type of metric to use for ranking rules
1069   */
1070  public SelectedTag getMetricType() {
1071    return new SelectedTag(m_metricType, TAGS_SELECTION);
1072  }
1073
1074  /**
1075   * Returns the tip text for this property
1076   * @return tip text for this property suitable for
1077   * displaying in the explorer/experimenter gui
1078   */
1079  public String metricTypeTipText() {
1080    return "Set the type of metric by which to rank rules. Confidence is "
1081      +"the proportion of the examples covered by the premise that are also "
1082      +"covered by the consequence(Class association rules can only be mined using confidence). Lift is confidence divided by the "
1083      +"proportion of all examples that are covered by the consequence. This "
1084      +"is a measure of the importance of the association that is independent "
1085      +"of support. Leverage is the proportion of additional examples covered "
1086      +"by both the premise and consequence above those expected if the "
1087      +"premise and consequence were independent of each other. The total "
1088      +"number of examples that this represents is presented in brackets "
1089      +"following the leverage. Conviction is "
1090      +"another measure of departure from independence. Conviction is given "
1091      +"by ";
1092  }
1093
1094  /**
1095   * Set the metric type for ranking rules
1096   *
1097   * @param d the type of metric
1098   */
1099  public void setMetricType (SelectedTag d) {
1100   
1101    if (d.getTags() == TAGS_SELECTION) {
1102      m_metricType = d.getSelectedTag().getID();
1103    }
1104
1105    if (m_significanceLevel != -1 && m_metricType != CONFIDENCE) {
1106      m_metricType = CONFIDENCE;
1107    }
1108
1109    if (m_metricType == CONFIDENCE) {
1110      setMinMetric(0.9);
1111    }
1112
1113    if (m_metricType == LIFT || m_metricType == CONVICTION) {
1114      setMinMetric(1.1);
1115    }
1116 
1117    if (m_metricType == LEVERAGE) {
1118      setMinMetric(0.1);
1119    }
1120  }
1121
1122  /**
1123   * Returns the tip text for this property
1124   * @return tip text for this property suitable for
1125   * displaying in the explorer/experimenter gui
1126   */
1127  public String minMetricTipText() {
1128    return "Minimum metric score. Consider only rules with scores higher than "
1129      +"this value.";
1130  }
1131
1132  /**
1133   * Get the value of minConfidence.
1134   *
1135   * @return Value of minConfidence.
1136   */
1137  public double getMinMetric() {
1138   
1139    return m_minMetric;
1140  }
1141 
1142  /**
1143   * Set the value of minConfidence.
1144   *
1145   * @param v  Value to assign to minConfidence.
1146   */
1147  public void setMinMetric(double v) {
1148   
1149    m_minMetric = v;
1150  }
1151
1152  /**
1153   * Returns the tip text for this property
1154   * @return tip text for this property suitable for
1155   * displaying in the explorer/experimenter gui
1156   */
1157  public String numRulesTipText() {
1158    return "Number of rules to find.";
1159  }
1160
1161  /**
1162   * Get the value of numRules.
1163   *
1164   * @return Value of numRules.
1165   */
1166  public int getNumRules() {
1167   
1168    return m_numRules;
1169  }
1170 
1171  /**
1172   * Set the value of numRules.
1173   *
1174   * @param v  Value to assign to numRules.
1175   */
1176  public void setNumRules(int v) {
1177   
1178    m_numRules = v;
1179  }
1180
1181  /**
1182   * Returns the tip text for this property
1183   * @return tip text for this property suitable for
1184   * displaying in the explorer/experimenter gui
1185   */
1186  public String deltaTipText() {
1187    return "Iteratively decrease support by this factor. Reduces support "
1188      +"until min support is reached or required number of rules has been "
1189      +"generated.";
1190  }
1191   
1192  /**
1193   * Get the value of delta.
1194   *
1195   * @return Value of delta.
1196   */
1197  public double getDelta() {
1198   
1199    return m_delta;
1200  }
1201 
1202  /**
1203   * Set the value of delta.
1204   *
1205   * @param v  Value to assign to delta.
1206   */
1207  public void setDelta(double v) {
1208   
1209    m_delta = v;
1210  }
1211
1212  /**
1213   * Returns the tip text for this property
1214   * @return tip text for this property suitable for
1215   * displaying in the explorer/experimenter gui
1216   */
1217  public String significanceLevelTipText() {
1218    return "Significance level. Significance test (confidence metric only).";
1219  }
1220
1221  /**
1222   * Get the value of significanceLevel.
1223   *
1224   * @return Value of significanceLevel.
1225   */
1226  public double getSignificanceLevel() {
1227   
1228    return m_significanceLevel;
1229  }
1230 
1231  /**
1232   * Set the value of significanceLevel.
1233   *
1234   * @param v  Value to assign to significanceLevel.
1235   */
1236  public void setSignificanceLevel(double v) {
1237   
1238    m_significanceLevel = v;
1239  }
1240
1241  /**
1242   * Sets whether itemsets are output as well
1243   * @param flag true if itemsets are to be output as well
1244   */ 
1245  public void setOutputItemSets(boolean flag){
1246    m_outputItemSets = flag;
1247  }
1248 
1249  /**
1250   * Gets whether itemsets are output as well
1251   * @return true if itemsets are output as well
1252   */ 
1253  public boolean getOutputItemSets(){
1254    return m_outputItemSets;
1255  }
1256
1257  /**
1258   * Returns the tip text for this property
1259   * @return tip text for this property suitable for
1260   * displaying in the explorer/experimenter gui
1261   */
1262  public String outputItemSetsTipText() {
1263    return "If enabled the itemsets are output as well.";
1264  }
1265
1266  /**
1267   * Sets verbose mode
1268   * @param flag true if algorithm should be run in verbose mode
1269   */ 
1270  public void setVerbose(boolean flag){
1271    m_verbose = flag;
1272  }
1273 
1274  /**
1275   * Gets whether algorithm is run in verbose mode
1276   * @return true if algorithm is run in verbose mode
1277   */ 
1278  public boolean getVerbose(){
1279    return m_verbose;
1280  }
1281
1282  /**
1283   * Returns the tip text for this property
1284   * @return tip text for this property suitable for
1285   * displaying in the explorer/experimenter gui
1286   */
1287  public String verboseTipText() {
1288    return "If enabled the algorithm will be run in verbose mode.";
1289  }
1290 
1291  /**
1292   * Returns the tip text for this property
1293   * @return tip text for this property suitable for
1294   * displaying in the explorer/experimenter gui
1295   */
1296  public String treatZeroAsMissingTipText() {
1297    return "If enabled, zero (that is, the first value of a nominal) is "
1298    + "treated in the same way as a missing value.";
1299  }
1300 
1301  /**
1302   * Sets whether zeros (i.e. the first value of a nominal attribute)
1303   * should be treated as missing values.
1304   *
1305   * @param z true if zeros should be treated as missing values.
1306   */
1307  public void setTreatZeroAsMissing(boolean z) {
1308    m_treatZeroAsMissing = z;
1309  }
1310 
1311  /**
1312   * Gets whether zeros (i.e. the first value of a nominal attribute)
1313   * is to be treated int he same way as missing values.
1314   *
1315   * @return true if zeros are to be treated like missing values.
1316   */
1317  public boolean getTreatZeroAsMissing() {
1318    return m_treatZeroAsMissing;
1319  }
1320
1321  /**
1322   * Method that finds all large itemsets for the given set of instances.
1323   *
1324   * @throws Exception if an attribute is numeric
1325   */
1326  private void findLargeItemSets() throws Exception {
1327   
1328    FastVector kMinusOneSets, kSets;
1329    Hashtable hashtable;
1330    int necSupport, necMaxSupport,i = 0;
1331   
1332   
1333   
1334    // Find large itemsets
1335
1336    // minimum support
1337    necSupport = (int)(m_minSupport * (double)m_instances.numInstances()+0.5);
1338    necMaxSupport = (int)(m_upperBoundMinSupport * (double)m_instances.numInstances()+0.5);
1339   
1340    kSets = AprioriItemSet.singletons(m_instances, m_treatZeroAsMissing);
1341    AprioriItemSet.upDateCounters(kSets,m_instances);
1342    kSets = AprioriItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
1343    if (kSets.size() == 0)
1344      return;
1345    do {
1346      m_Ls.addElement(kSets);
1347      kMinusOneSets = kSets;
1348      kSets = AprioriItemSet.mergeAllItemSets(kMinusOneSets, i, m_instances.numInstances());
1349      hashtable = AprioriItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size());
1350      m_hashtables.addElement(hashtable);
1351      kSets = AprioriItemSet.pruneItemSets(kSets, hashtable);
1352      AprioriItemSet.upDateCounters(kSets, m_instances);
1353      kSets = AprioriItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
1354      i++;
1355    } while (kSets.size() > 0);
1356  } 
1357
1358  /**
1359   * Method that finds all association rules and performs significance test.
1360   *
1361   * @throws Exception if an attribute is numeric
1362   */
1363  private void findRulesBruteForce() throws Exception {
1364
1365    FastVector[] rules;
1366
1367    // Build rules
1368    for (int j = 1; j < m_Ls.size(); j++) {
1369      FastVector currentItemSets = (FastVector)m_Ls.elementAt(j);
1370      Enumeration enumItemSets = currentItemSets.elements();
1371      while (enumItemSets.hasMoreElements()) {
1372        AprioriItemSet currentItemSet = (AprioriItemSet)enumItemSets.nextElement();
1373        //AprioriItemSet currentItemSet = new AprioriItemSet((ItemSet)enumItemSets.nextElement());
1374        rules=currentItemSet.generateRulesBruteForce(m_minMetric,m_metricType,
1375                                  m_hashtables,j+1,
1376                                  m_instances.numInstances(),
1377                                  m_significanceLevel);
1378        for (int k = 0; k < rules[0].size(); k++) {
1379          m_allTheRules[0].addElement(rules[0].elementAt(k));
1380          m_allTheRules[1].addElement(rules[1].elementAt(k));
1381          m_allTheRules[2].addElement(rules[2].elementAt(k));
1382
1383          m_allTheRules[3].addElement(rules[3].elementAt(k));
1384          m_allTheRules[4].addElement(rules[4].elementAt(k));
1385          m_allTheRules[5].addElement(rules[5].elementAt(k));
1386        }
1387      }
1388    }
1389  }
1390
1391  /**
1392   * Method that finds all association rules.
1393   *
1394   * @throws Exception if an attribute is numeric
1395   */
1396  private void findRulesQuickly() throws Exception {
1397
1398    FastVector[] rules;
1399
1400    // Build rules
1401    for (int j = 1; j < m_Ls.size(); j++) {
1402      FastVector currentItemSets = (FastVector)m_Ls.elementAt(j);
1403      Enumeration enumItemSets = currentItemSets.elements();
1404      while (enumItemSets.hasMoreElements()) {
1405        AprioriItemSet currentItemSet = (AprioriItemSet)enumItemSets.nextElement();
1406        //AprioriItemSet currentItemSet = new AprioriItemSet((ItemSet)enumItemSets.nextElement());
1407        rules = currentItemSet.generateRules(m_minMetric, m_hashtables, j + 1);
1408        for (int k = 0; k < rules[0].size(); k++) {
1409          m_allTheRules[0].addElement(rules[0].elementAt(k));
1410          m_allTheRules[1].addElement(rules[1].elementAt(k));
1411          m_allTheRules[2].addElement(rules[2].elementAt(k));
1412        }
1413      }
1414    }
1415  }
1416 
1417      /**
1418     *
1419     * Method that finds all large itemsets for class association rules for the given set of instances.
1420     * @throws Exception if an attribute is numeric
1421     */
1422    private void findLargeCarItemSets() throws Exception {
1423       
1424        FastVector kMinusOneSets, kSets;
1425        Hashtable hashtable;
1426        int necSupport, necMaxSupport,i = 0;
1427       
1428        // Find large itemsets
1429       
1430        // minimum support
1431        double nextMinSupport = m_minSupport*(double)m_instances.numInstances();
1432        double nextMaxSupport = m_upperBoundMinSupport*(double)m_instances.numInstances();
1433        if((double)Math.rint(nextMinSupport) == nextMinSupport){
1434            necSupport = (int) nextMinSupport;
1435        }
1436        else{
1437            necSupport = Math.round((float)(nextMinSupport+0.5));
1438        }
1439        if((double)Math.rint(nextMaxSupport) == nextMaxSupport){
1440            necMaxSupport = (int) nextMaxSupport;
1441        }
1442        else{
1443            necMaxSupport = Math.round((float)(nextMaxSupport+0.5));
1444        }
1445       
1446        //find item sets of length one
1447        kSets = LabeledItemSet.singletons(m_instances,m_onlyClass);
1448        LabeledItemSet.upDateCounters(kSets, m_instances,m_onlyClass);
1449       
1450        //check if a item set of lentgh one is frequent, if not delete it
1451        kSets = LabeledItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
1452        if (kSets.size() == 0)
1453            return;
1454        do {
1455            m_Ls.addElement(kSets);
1456            kMinusOneSets = kSets;
1457            kSets = LabeledItemSet.mergeAllItemSets(kMinusOneSets, i, m_instances.numInstances());
1458            hashtable = LabeledItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size());
1459            kSets = LabeledItemSet.pruneItemSets(kSets, hashtable);
1460            LabeledItemSet.upDateCounters(kSets, m_instances,m_onlyClass);
1461            kSets = LabeledItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
1462            i++;
1463        } while (kSets.size() > 0);
1464    } 
1465
1466   
1467
1468  /**
1469   * Method that finds all class association rules.
1470   *
1471   * @throws Exception if an attribute is numeric
1472   */
1473   private void findCarRulesQuickly() throws Exception {
1474
1475    FastVector[] rules;
1476
1477    // Build rules
1478    for (int j = 0; j < m_Ls.size(); j++) {
1479      FastVector currentLabeledItemSets = (FastVector)m_Ls.elementAt(j);
1480      Enumeration enumLabeledItemSets = currentLabeledItemSets.elements();
1481      while (enumLabeledItemSets.hasMoreElements()) {
1482        LabeledItemSet currentLabeledItemSet = (LabeledItemSet)enumLabeledItemSets.nextElement();
1483        rules = currentLabeledItemSet.generateRules(m_minMetric,false);
1484        for (int k = 0; k < rules[0].size(); k++) {
1485          m_allTheRules[0].addElement(rules[0].elementAt(k));
1486          m_allTheRules[1].addElement(rules[1].elementAt(k));
1487          m_allTheRules[2].addElement(rules[2].elementAt(k));
1488        }
1489      }
1490    }
1491  }
1492
1493  /**
1494   * returns all the rules
1495   *
1496   * @return            all the rules
1497   * @see               #m_allTheRules
1498   */
1499  public FastVector[] getAllTheRules() {
1500    return m_allTheRules;
1501  }
1502 
1503  /**
1504   * Returns the revision string.
1505   *
1506   * @return            the revision
1507   */
1508  public String getRevision() {
1509    return RevisionUtils.extract("$Revision: 5698 $");
1510  }
1511
1512  /**
1513   * Main method.
1514   *
1515   * @param args the commandline options
1516   */
1517  public static void main(String[] args) {
1518    runAssociator(new Apriori(), args);
1519  }
1520}
1521
Note: See TracBrowser for help on using the repository browser.