/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * LabeledItemSet.java * Copyright (C) 2004 University of Waikato, Hamilton, New Zealand * */ package weka.associations; import weka.core.FastVector; import weka.core.Instance; import weka.core.Instances; import weka.core.RevisionHandler; import weka.core.RevisionUtils; import java.io.Serializable; import java.util.Enumeration; import java.util.Hashtable; /** * Class for storing a set of items together with a class label. Item sets are stored in a lexicographic * order, which is determined by the header information of the set of instances * used for generating the set of items. All methods in this class assume that * item sets are stored in lexicographic order. * The class provides the methods used for item sets in class association rule mining. * Because every item set knows its class label the training set can be splitted up virtually. * * @author Stefan Mutter (mutter@cs.waikato.ac.nz) * @version $Revision: 1.5 $ */ public class LabeledItemSet extends ItemSet implements Serializable, RevisionHandler { /** for serialization */ private static final long serialVersionUID = 4158771925518299903L; /** The class label. */ protected int m_classLabel; /** The support of the rule. */ protected int m_ruleSupCounter; /** * Constructor * * @param totalTrans the total number of transactions * @param classLabel the class lebel */ public LabeledItemSet(int totalTrans, int classLabel){ super(totalTrans); m_classLabel = classLabel; } /** * Deletes all item sets that don't have minimum support and have more than maximum support * @return the reduced set of item sets * @param maxSupport the maximum support * @param itemSets the set of item sets to be pruned * @param minSupport the minimum number of transactions to be covered */ public static FastVector deleteItemSets(FastVector itemSets, int minSupport, int maxSupport) { FastVector newVector = new FastVector(itemSets.size()); for (int i = 0; i < itemSets.size(); i++) { LabeledItemSet current = (LabeledItemSet)itemSets.elementAt(i); if ((current.m_ruleSupCounter >= minSupport) && (current.m_ruleSupCounter <= maxSupport)) newVector.addElement(current); } return newVector; } /** * Tests if two item sets are equal. * * @param itemSet another item set * @return true if this item set contains the same items as the given one */ public final boolean equals(Object itemSet) { if (!(this.equalCondset(itemSet))) return false; if(m_classLabel != ((LabeledItemSet)itemSet).m_classLabel) return false; return true; } /** * Compares two item sets * @param itemSet an item set * @return true if the the item sets are equal, false otherwise */ public final boolean equalCondset(Object itemSet) { if ((itemSet == null) || !(itemSet.getClass().equals(this.getClass()))) { return false; } if (m_items.length != ((ItemSet)itemSet).items().length) return false; for (int i = 0; i < m_items.length; i++) if (m_items[i] != ((ItemSet)itemSet).itemAt(i)) return false; return true; } /** * Return a hashtable filled with the given item sets. * * @param itemSets the set of item sets to be used for filling the hash table * @param initialSize the initial size of the hashtable * @return the generated hashtable */ public static Hashtable getHashtable(FastVector itemSets, int initialSize) { Hashtable hashtable = new Hashtable(initialSize); for (int i = 0; i < itemSets.size(); i++) { LabeledItemSet current = (LabeledItemSet)itemSets.elementAt(i); hashtable.put(current, new Integer(current.m_classLabel)); } return hashtable; } /** * Merges all item sets in the set of (k-1)-item sets * to create the (k)-item sets and updates the counters. * @return the generated (k)-item sets * @param totalTrans the total number of transactions * @param itemSets the set of (k-1)-item sets * @param size the value of (k-1) */ public static FastVector mergeAllItemSets(FastVector itemSets, int size, int totalTrans) { FastVector newVector = new FastVector(); LabeledItemSet result; int numFound, k; for (int i = 0; i < itemSets.size(); i++) { LabeledItemSet first = (LabeledItemSet)itemSets.elementAt(i); out: for (int j = i+1; j < itemSets.size(); j++) { LabeledItemSet second = (LabeledItemSet)itemSets.elementAt(j); while(first.m_classLabel != second.m_classLabel){ j++; if(j == itemSets.size()) break out; second = (LabeledItemSet)itemSets.elementAt(j); } result = new LabeledItemSet(totalTrans,first.m_classLabel); result.m_items = new int[first.m_items.length]; // Find and copy common prefix of size 'size' numFound = 0; k = 0; while (numFound < size) { if (first.m_items[k] == second.m_items[k]) { if (first.m_items[k] != -1) numFound++; result.m_items[k] = first.m_items[k]; } else break out; k++; } // Check difference while (k < first.m_items.length) { if ((first.m_items[k] != -1) && (second.m_items[k] != -1)) break; else { if (first.m_items[k] != -1) result.m_items[k] = first.m_items[k]; else result.m_items[k] = second.m_items[k]; } k++; } if (k == first.m_items.length) { result.m_ruleSupCounter = 0; result.m_counter = 0; newVector.addElement(result); } } } return newVector; } /** * Splits the class attribute away. Depending on the invert flag, the instances without class attribute or only the class attribute of all instances is returned * @param instances the instances * @param invert flag; if true only the class attribute remains, otherweise the class attribute is the only attribute that is deleted. * @throws Exception exception if instances cannot be splitted * @return Instances without the class attribute or instances with only the class attribute */ public static Instances divide(Instances instances, boolean invert) throws Exception{ Instances newInstances = new Instances(instances); if(instances.classIndex() < 0) throw new Exception("For class association rule mining a class attribute has to be specified."); if(invert){ for(int i=0;i