source: src/main/java/weka/associations/CaRuleGeneration.java @ 10

Last change on this file since 10 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 8.8 KB
RevLine 
[4]1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    CaRuleGeneration.java
19 *    Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.associations;
24
25import weka.core.Attribute;
26import weka.core.FastVector;
27import weka.core.Instances;
28import weka.core.RevisionHandler;
29import weka.core.RevisionUtils;
30import weka.core.UnassignedClassException;
31
32import java.io.Serializable;
33import java.util.Hashtable;
34import java.util.TreeSet;
35
36
37/**
38 * Class implementing the rule generation procedure of the predictive apriori algorithm for class association rules.
39 *
40 * For association rules in gerneral the method is described in:
41 * T. Scheffer (2001). <i>Finding Association Rules That Trade Support
42 * Optimally against Confidence</i>. Proc of the 5th European Conf.
43 * on Principles and Practice of Knowledge Discovery in Databases (PKDD'01),
44 * pp. 424-435. Freiburg, Germany: Springer-Verlag. <p>
45 *
46 * The implementation follows the paper expect for adding a rule to the output of the
47 * <i>n</i> best rules. A rule is added if:
48 * the expected predictive accuracy of this rule is among the <i>n</i> best and it is
49 * not subsumed by a rule with at least the same expected predictive accuracy
50 * (out of an unpublished manuscript from T. Scheffer).
51 *
52 * @author Stefan Mutter (mutter@cs.waikato.ac.nz)
53 * @version $Revision: 1.4 $ */
54public class CaRuleGeneration
55  extends RuleGeneration
56  implements Serializable, RevisionHandler {
57
58  /** for serialization */
59  private static final long serialVersionUID = 3065752149646517703L;
60
61  /**
62   * Constructor
63   * @param itemSet the item set that forms the premise of the rule
64   */
65  public CaRuleGeneration(ItemSet itemSet){
66    super(itemSet); 
67  }
68
69  /**
70   * Generates all rules for an item set. The item set is the premise.
71   * @param numRules the number of association rules the use wants to mine.
72   * This number equals the size <i>n</i> of the list of the
73   * best rules.
74   * @param midPoints the mid points of the intervals
75   * @param priors Hashtable that contains the prior probabilities
76   * @param expectation the minimum value of the expected predictive accuracy
77   * that is needed to get into the list of the best rules
78   * @param instances the instances for which association rules are generated
79   * @param best the list of the <i>n</i> best rules.
80   * The list is implemented as a TreeSet
81   * @param genTime the maximum time of generation
82   * @return all the rules with minimum confidence for the given item set
83   */
84  public TreeSet generateRules(int numRules, double[] midPoints, Hashtable priors, double expectation, Instances instances, TreeSet best, int genTime) {
85
86    boolean redundant = false;
87    FastVector consequences = new FastVector();
88    ItemSet premise;
89    RuleItem current = null, old = null;
90
91    Hashtable hashtable;
92
93    m_change = false;
94    m_midPoints = midPoints;
95    m_priors = priors;
96    m_best = best;
97    m_expectation = expectation;
98    m_count = genTime;
99    m_instances = instances;
100
101    //create rule body
102    premise =null;
103    premise = new ItemSet(m_totalTransactions);
104    int[] premiseItems = new int[m_items.length];
105    System.arraycopy(m_items, 0, premiseItems, 0, m_items.length);
106    premise.setItem(premiseItems);
107    premise.setCounter(m_counter);
108
109    consequences = singleConsequence(instances);
110
111    //create n best rules
112    do{
113      if(premise == null || consequences.size() == 0)
114        return m_best;
115      m_minRuleCount = 1;
116      while(expectation((double)m_minRuleCount,premise.counter(),m_midPoints,m_priors) <= m_expectation){
117        m_minRuleCount++;
118        if(m_minRuleCount > premise.counter())
119          return m_best;
120      }
121      redundant = false;
122
123      //create possible heads 
124      FastVector allRuleItems = new FastVector();
125      int h = 0;
126      while(h < consequences.size()){
127        RuleItem dummie = new RuleItem();
128        m_count++;
129        current = dummie.generateRuleItem(premise,(ItemSet)consequences.elementAt(h),instances,m_count,m_minRuleCount,m_midPoints,m_priors);
130        if(current != null)
131          allRuleItems.addElement(current);
132        h++;
133      }
134
135      //update best
136      for(h =0; h< allRuleItems.size();h++){
137        current = (RuleItem)allRuleItems.elementAt(h);
138        if(m_best.size() < numRules){
139          m_change =true;
140          redundant = removeRedundant(current); 
141        }
142        else{
143          m_expectation = ((RuleItem)(m_best.first())).accuracy();
144          if(current.accuracy() > m_expectation){
145            boolean remove = m_best.remove(m_best.first());
146            m_change = true;
147            redundant = removeRedundant(current);
148            m_expectation = ((RuleItem)(m_best.first())).accuracy();
149            while(expectation((double)m_minRuleCount, (current.premise()).counter(),m_midPoints,m_priors) < m_expectation){
150              m_minRuleCount++;
151              if(m_minRuleCount > (current.premise()).counter())
152                break;
153            } 
154          } 
155        }   
156      }   
157    }while(redundant); 
158    return m_best;
159  }
160
161  /**
162   * Methods that decides whether or not rule a subsumes rule b.
163   * The defintion of subsumption is:
164   * Rule a subsumes rule b, if a subsumes b
165   * AND
166   * a has got least the same expected predictive accuracy as b.
167   * @param a an association rule stored as a RuleItem
168   * @param b an association rule stored as a RuleItem
169   * @return true if rule a subsumes rule b or false otherwise.
170   */ 
171  public static boolean aSubsumesB(RuleItem a, RuleItem b){
172
173    if(!a.consequence().equals(b.consequence()))
174      return false;
175    if(a.accuracy() < b.accuracy())
176      return false;
177    for(int k = 0; k < ((a.premise()).items()).length;k++){
178      if((a.premise()).itemAt(k) != (b.premise()).itemAt(k)){
179        if(((a.premise()).itemAt(k) != -1 && (b.premise()).itemAt(k) != -1) || (b.premise()).itemAt(k) == -1)
180          return false;
181      }
182      /*if(a.m_consequence.m_items[k] != b.m_consequence.m_items[k]){
183            if((a.m_consequence.m_items[k] != -1 && b.m_consequence.m_items[k] != -1) || a.m_consequence.m_items[k] == -1)
184                return false;
185        }*/
186    }
187    return true;
188
189  }
190
191  /**
192   * Converts the header info of the given set of instances into a set
193   * of item sets (singletons). The ordering of values in the header file
194   * determines the lexicographic order.
195   *
196   * @param instances the set of instances whose header info is to be used
197   * @return a set of item sets, each containing a single item
198   * @exception Exception if singletons can't be generated successfully
199   */
200  public static FastVector singletons(Instances instances) throws Exception {
201
202    FastVector setOfItemSets = new FastVector();
203    ItemSet current;
204
205    if(instances.classIndex() == -1)
206      throw new UnassignedClassException("Class index is negative (not set)!");
207    Attribute att = instances.classAttribute();
208    for (int i = 0; i < instances.numAttributes(); i++) {
209      if (instances.attribute(i).isNumeric())
210        throw new Exception("Can't handle numeric attributes!");
211      if(i != instances.classIndex()){
212        for (int j = 0; j < instances.attribute(i).numValues(); j++) {
213          current = new ItemSet(instances.numInstances());
214          int[] currentItems = new int[instances.numAttributes()];
215          for (int k = 0; k < instances.numAttributes(); k++)
216            currentItems[k] = -1;
217          currentItems[i] = j;
218          current.setItem(currentItems);
219          setOfItemSets.addElement(current);
220        }
221      }
222    }
223    return setOfItemSets;
224  }
225
226  /**
227   * generates a consequence of length 1 for a class association rule.
228   * @param instances the instances under consideration
229   * @return FastVector with consequences of length 1
230   */ 
231  public static FastVector singleConsequence(Instances instances){
232
233    ItemSet consequence;
234    FastVector consequences = new FastVector();
235
236    for (int j = 0; j < (instances.classAttribute()).numValues(); j++) {
237      consequence = new ItemSet(instances.numInstances());
238      int[] consequenceItems = new int[instances.numAttributes()];
239      consequence.setItem(consequenceItems);
240      for (int k = 0; k < instances.numAttributes(); k++) 
241        consequence.setItemAt(-1,k);
242      consequence.setItemAt(j,instances.classIndex());
243      consequences.addElement(consequence);
244    }
245    return consequences;
246
247  }
248
249  /**
250   * Returns the revision string.
251   *
252   * @return            the revision
253   */
254  public String getRevision() {
255    return RevisionUtils.extract("$Revision: 1.4 $");
256  }
257}
Note: See TracBrowser for help on using the repository browser.