source: src/main/java/weka/associations/LabeledItemSet.java @ 8

Last change on this file since 8 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 12.7 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * LabeledItemSet.java
19 * Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.associations;
24
25import weka.core.FastVector;
26import weka.core.Instance;
27import weka.core.Instances;
28import weka.core.RevisionHandler;
29import weka.core.RevisionUtils;
30
31import java.io.Serializable;
32import java.util.Enumeration;
33import java.util.Hashtable;
34
35
36/**
37 * Class for storing a set of items together with a  class label. Item sets are stored in a lexicographic
38 * order, which is determined by the header information of the set of instances
39 * used for generating the set of items. All methods in this class assume that
40 * item sets are stored in lexicographic order.
41 * The class provides the methods used for item sets in class association rule mining.
42 * Because every item set knows its class label the training set can be splitted up virtually.
43 *
44 * @author Stefan Mutter (mutter@cs.waikato.ac.nz)
45 * @version $Revision: 1.5 $
46 */
47
48public class LabeledItemSet
49  extends ItemSet
50  implements Serializable, RevisionHandler {
51
52  /** for serialization */
53  private static final long serialVersionUID = 4158771925518299903L;
54
55    /** The class label. */
56    protected int m_classLabel;
57
58     /** The support of the rule. */
59    protected int m_ruleSupCounter;   
60 
61    /**
62     * Constructor
63     *
64     * @param totalTrans the total number of transactions
65     * @param classLabel the class lebel
66     */   
67    public LabeledItemSet(int totalTrans, int classLabel){
68       
69        super(totalTrans);
70        m_classLabel = classLabel;
71    }
72
73   
74
75    /**
76     * Deletes all item sets that don't have minimum support and have more than maximum support
77     * @return the reduced set of item sets
78     * @param maxSupport the maximum support
79     * @param itemSets the set of item sets to be pruned
80     * @param minSupport the minimum number of transactions to be covered
81     */
82  public static FastVector deleteItemSets(FastVector itemSets, 
83                                          int minSupport,
84                                          int maxSupport) {
85
86    FastVector newVector = new FastVector(itemSets.size());
87
88    for (int i = 0; i < itemSets.size(); i++) {
89      LabeledItemSet current = (LabeledItemSet)itemSets.elementAt(i);
90      if ((current.m_ruleSupCounter >= minSupport) 
91          && (current.m_ruleSupCounter <= maxSupport))
92            newVector.addElement(current);
93    }
94    return newVector;
95  }
96
97    /**
98   * Tests if two item sets are equal.
99   *
100   * @param itemSet another item set
101   * @return true if this item set contains the same items as the given one
102   */
103  public final boolean equals(Object itemSet) {
104
105    if (!(this.equalCondset(itemSet)))
106      return false;
107    if(m_classLabel != ((LabeledItemSet)itemSet).m_classLabel)
108        return false;
109   
110    return true;
111  }
112
113  /**
114   * Compares two item sets
115   * @param itemSet an item set
116   * @return true if the the item sets are equal, false otherwise
117   */ 
118    public final boolean equalCondset(Object itemSet) {
119
120    if ((itemSet == null) || !(itemSet.getClass().equals(this.getClass()))) {
121      return false;
122    }
123    if (m_items.length != ((ItemSet)itemSet).items().length)
124      return false;
125    for (int i = 0; i < m_items.length; i++)
126      if (m_items[i] != ((ItemSet)itemSet).itemAt(i))
127        return false;
128    return true;
129  }
130
131    /**
132   * Return a hashtable filled with the given item sets.
133   *
134   * @param itemSets the set of item sets to be used for filling the hash table
135   * @param initialSize the initial size of the hashtable
136   * @return the generated hashtable
137   */
138  public static Hashtable getHashtable(FastVector itemSets, int initialSize) {
139
140    Hashtable hashtable = new Hashtable(initialSize);
141    for (int i = 0; i < itemSets.size(); i++) {
142      LabeledItemSet current = (LabeledItemSet)itemSets.elementAt(i);
143      hashtable.put(current, new Integer(current.m_classLabel));
144    }
145   
146    return hashtable;
147  }
148
149
150
151    /**
152     * Merges all item sets in the set of (k-1)-item sets
153     * to create the (k)-item sets and updates the counters.
154     * @return the generated (k)-item sets
155     * @param totalTrans the total number of transactions
156     * @param itemSets the set of (k-1)-item sets
157     * @param size the value of (k-1)
158     */
159  public static FastVector mergeAllItemSets(FastVector itemSets, int size, 
160                                            int totalTrans) {
161     
162      FastVector newVector = new FastVector();
163      LabeledItemSet result;
164      int numFound, k;
165     
166      for (int i = 0; i < itemSets.size(); i++) {
167          LabeledItemSet first = (LabeledItemSet)itemSets.elementAt(i);
168      out:
169          for (int j = i+1; j < itemSets.size(); j++) {
170              LabeledItemSet second = (LabeledItemSet)itemSets.elementAt(j);
171              while(first.m_classLabel != second.m_classLabel){
172                  j++;
173                  if(j == itemSets.size())
174                      break out;
175                  second = (LabeledItemSet)itemSets.elementAt(j);
176              }
177              result = new LabeledItemSet(totalTrans,first.m_classLabel);
178              result.m_items = new int[first.m_items.length];
179             
180              // Find and copy common prefix of size 'size'
181              numFound = 0;
182              k = 0;
183              while (numFound < size) {
184                  if (first.m_items[k] == second.m_items[k]) {
185                      if (first.m_items[k] != -1) 
186                          numFound++;
187                      result.m_items[k] = first.m_items[k];
188                  } else 
189                      break out;
190                  k++;
191              }
192             
193              // Check difference
194              while (k < first.m_items.length) {
195                  if ((first.m_items[k] != -1) && (second.m_items[k] != -1))
196                      break;
197                  else {
198                      if (first.m_items[k] != -1)
199                          result.m_items[k] = first.m_items[k];
200                      else
201                          result.m_items[k] = second.m_items[k];
202                  }
203                  k++;
204              }
205              if (k == first.m_items.length) {
206                  result.m_ruleSupCounter = 0;
207                  result.m_counter = 0;
208                  newVector.addElement(result);
209              }
210          }
211      }
212     
213    return newVector;
214  }
215   
216  /**
217   * Splits the class attribute away. Depending on the invert flag, the instances without class attribute or only the class attribute of all instances is returned
218   * @param instances the instances
219   * @param invert flag; if true only the class attribute remains, otherweise the class attribute is the only attribute that is deleted.
220   * @throws Exception exception if instances cannot be splitted
221   * @return Instances without the class attribute or instances with only the class attribute
222   */ 
223    public static Instances divide(Instances instances, boolean invert) throws Exception{
224   
225        Instances newInstances = new Instances(instances);
226        if(instances.classIndex() < 0)
227            throw new Exception("For class association rule mining a class attribute has to be specified.");
228        if(invert){
229                for(int i=0;i<newInstances.numAttributes();i++){
230                    if(i!=newInstances.classIndex()){
231                        newInstances.deleteAttributeAt(i);
232                        i--;
233                    }
234                }
235            return newInstances;
236        }
237        else{
238            newInstances.setClassIndex(-1);
239            newInstances.deleteAttributeAt(instances.classIndex());
240            return newInstances;
241        }
242    }
243
244   
245   
246     /**
247      * Converts the header info of the given set of instances into a set
248      * of item sets (singletons). The ordering of values in the header file
249      * determines the lexicographic order. Each item set knows its class label.
250      * @return a set of item sets, each containing a single item
251      * @param instancesNoClass instances without the class attribute
252      * @param classes the values of the class attribute sorted according to instances
253      * @exception Exception if singletons can't be generated successfully
254      */
255    public static FastVector singletons(Instances instancesNoClass, Instances classes) throws Exception {
256
257    FastVector cSet, setOfItemSets = new FastVector();
258    LabeledItemSet current;
259   
260   
261    //make singletons
262    for (int i = 0; i < instancesNoClass.numAttributes(); i++) {
263      if (instancesNoClass.attribute(i).isNumeric())
264          throw new Exception("Can't handle numeric attributes!");
265      for (int j = 0; j < instancesNoClass.attribute(i).numValues(); j++){
266          for(int k =0; k < (classes.attribute(0)).numValues(); k++){
267              current = new LabeledItemSet(instancesNoClass.numInstances(),k);
268              current.m_items = new int[instancesNoClass.numAttributes()];
269              for (int l = 0; l < instancesNoClass.numAttributes(); l++)
270                  current.m_items[l] = -1;
271              current.m_items[i] = j;
272              setOfItemSets.addElement(current);
273          }     
274      }
275    }
276    return setOfItemSets;
277  }
278   
279 
280   
281 
282    /**
283   * Prunes a set of (k)-item sets using the given (k-1)-item sets.
284   *
285   * @param toPrune the set of (k)-item sets to be pruned
286   * @param kMinusOne the (k-1)-item sets to be used for pruning
287   * @return the pruned set of item sets
288   */
289  public static FastVector pruneItemSets(FastVector toPrune, Hashtable kMinusOne){
290
291    FastVector newVector = new FastVector(toPrune.size());
292    int help, j;
293
294   
295    for (int i = 0; i < toPrune.size(); i++) {
296      LabeledItemSet current = (LabeledItemSet)toPrune.elementAt(i);
297     
298      for (j = 0; j < current.m_items.length; j++){
299              if (current.m_items[j] != -1) {
300                  help = current.m_items[j];
301                  current.m_items[j] = -1;
302                  if(kMinusOne.get(current) != null && (current.m_classLabel == (((Integer)kMinusOne.get(current)).intValue())))
303                      current.m_items[j] = help;
304                  else{
305                      current.m_items[j] = help;
306                      break;
307                  } 
308          }
309      }
310      if (j == current.m_items.length) 
311        newVector.addElement(current);
312    }
313    return newVector;
314  }
315
316   
317  /**
318   * Outputs the support for an item set.
319   *
320   * @return the support
321   */
322  public final int support() {
323
324    return m_ruleSupCounter;
325  }
326
327 
328 
329     /**
330      * Updates counter of item set with respect to given transaction.
331      * @param instanceNoClass instances without the class attribute
332      * @param instanceClass the values of the class attribute sorted according to instances
333      */
334  public final void upDateCounter(Instance instanceNoClass, Instance instanceClass) {
335
336     
337      if (containedBy(instanceNoClass)){
338          m_counter++;
339          if(this.m_classLabel == instanceClass.value(0))
340              m_ruleSupCounter++;
341      }
342  }
343
344  /**
345   * Updates counter of a specific item set
346   * @param itemSets an item sets
347   * @param instancesNoClass instances without the class attribute
348   * @param instancesClass the values of the class attribute sorted according to instances
349   */ 
350   public static void upDateCounters(FastVector itemSets, Instances instancesNoClass, Instances instancesClass){
351
352    for (int i = 0; i < instancesNoClass.numInstances(); i++) {
353        Enumeration enu = itemSets.elements();
354        while (enu.hasMoreElements())
355            ((LabeledItemSet)enu.nextElement()).upDateCounter(instancesNoClass.instance(i),instancesClass.instance(i));
356    }
357
358  }
359
360   /**
361    * Generates rules out of item sets
362    * @param minConfidence the minimum confidence
363    * @param noPrune flag indicating whether the rules are pruned accoridng to the minimum confidence value
364    * @return a set of rules
365    */   
366    public final FastVector[] generateRules(double minConfidence, boolean noPrune) {
367
368        FastVector premises = new FastVector(),consequences = new FastVector(),
369            conf = new FastVector();
370        FastVector[] rules = new FastVector[3];
371        ItemSet premise, consequence;
372       
373    // Generate all rules with class in the consequence.
374        premise = new ItemSet(m_totalTransactions);
375        consequence = new ItemSet(m_totalTransactions);
376        int[] premiseItems = new int[m_items.length];
377        int[] consequenceItems = new int[1];
378        System.arraycopy(m_items, 0, premiseItems, 0, m_items.length);
379        consequence.setItem(consequenceItems);
380        premise.setItem(premiseItems);
381        consequence.setItemAt(m_classLabel,0);
382        consequence.setCounter(this.m_ruleSupCounter);
383        premise.setCounter(this.m_counter);
384        premises.addElement(premise);
385        consequences.addElement(consequence);
386        conf.addElement(new Double((double)this.m_ruleSupCounter/(double)this.m_counter));
387           
388        rules[0] = premises;
389        rules[1] = consequences;
390        rules[2] = conf;
391        if(!noPrune)
392            pruneRules(rules, minConfidence);
393       
394   
395        return rules;
396    }
397   
398    /**
399     * Returns the revision string.
400     *
401     * @return          the revision
402     */
403    public String getRevision() {
404      return RevisionUtils.extract("$Revision: 1.5 $");
405    }
406}
Note: See TracBrowser for help on using the repository browser.