| 1 | /* |
|---|
| 2 | * This program is free software; you can redistribute it and/or modify |
|---|
| 3 | * it under the terms of the GNU General Public License as published by |
|---|
| 4 | * the Free Software Foundation; either version 2 of the License, or |
|---|
| 5 | * (at your option) any later version. |
|---|
| 6 | * |
|---|
| 7 | * This program is distributed in the hope that it will be useful, |
|---|
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 10 | * GNU General Public License for more details. |
|---|
| 11 | * |
|---|
| 12 | * You should have received a copy of the GNU General Public License |
|---|
| 13 | * along with this program; if not, write to the Free Software |
|---|
| 14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|---|
| 15 | */ |
|---|
| 16 | |
|---|
| 17 | /* |
|---|
| 18 | * ClassifierSplitModel.java |
|---|
| 19 | * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand |
|---|
| 20 | * |
|---|
| 21 | */ |
|---|
| 22 | |
|---|
| 23 | package weka.classifiers.trees.j48; |
|---|
| 24 | |
|---|
| 25 | import weka.core.Instance; |
|---|
| 26 | import weka.core.Instances; |
|---|
| 27 | import weka.core.RevisionHandler; |
|---|
| 28 | import weka.core.Utils; |
|---|
| 29 | |
|---|
| 30 | import java.io.Serializable; |
|---|
| 31 | |
|---|
| 32 | /** |
|---|
| 33 | * Abstract class for classification models that can be used |
|---|
| 34 | * recursively to split the data. |
|---|
| 35 | * |
|---|
| 36 | * @author Eibe Frank (eibe@cs.waikato.ac.nz) |
|---|
| 37 | * @version $Revision: 1.11 $ |
|---|
| 38 | */ |
|---|
| 39 | public abstract class ClassifierSplitModel |
|---|
| 40 | implements Cloneable, Serializable, RevisionHandler { |
|---|
| 41 | |
|---|
| 42 | /** for serialization */ |
|---|
| 43 | private static final long serialVersionUID = 4280730118393457457L; |
|---|
| 44 | |
|---|
| 45 | /** Distribution of class values. */ |
|---|
| 46 | protected Distribution m_distribution; |
|---|
| 47 | |
|---|
| 48 | /** Number of created subsets. */ |
|---|
| 49 | protected int m_numSubsets; |
|---|
| 50 | |
|---|
| 51 | /** |
|---|
| 52 | * Allows to clone a model (shallow copy). |
|---|
| 53 | */ |
|---|
| 54 | public Object clone() { |
|---|
| 55 | |
|---|
| 56 | Object clone = null; |
|---|
| 57 | |
|---|
| 58 | try { |
|---|
| 59 | clone = super.clone(); |
|---|
| 60 | } catch (CloneNotSupportedException e) { |
|---|
| 61 | } |
|---|
| 62 | return clone; |
|---|
| 63 | } |
|---|
| 64 | |
|---|
| 65 | /** |
|---|
| 66 | * Builds the classifier split model for the given set of instances. |
|---|
| 67 | * |
|---|
| 68 | * @exception Exception if something goes wrong |
|---|
| 69 | */ |
|---|
| 70 | public abstract void buildClassifier(Instances instances) throws Exception; |
|---|
| 71 | |
|---|
| 72 | /** |
|---|
| 73 | * Checks if generated model is valid. |
|---|
| 74 | */ |
|---|
| 75 | public final boolean checkModel() { |
|---|
| 76 | |
|---|
| 77 | if (m_numSubsets > 0) |
|---|
| 78 | return true; |
|---|
| 79 | else |
|---|
| 80 | return false; |
|---|
| 81 | } |
|---|
| 82 | |
|---|
| 83 | /** |
|---|
| 84 | * Classifies a given instance. |
|---|
| 85 | * |
|---|
| 86 | * @exception Exception if something goes wrong |
|---|
| 87 | */ |
|---|
| 88 | public final double classifyInstance(Instance instance) |
|---|
| 89 | throws Exception { |
|---|
| 90 | |
|---|
| 91 | int theSubset; |
|---|
| 92 | |
|---|
| 93 | theSubset = whichSubset(instance); |
|---|
| 94 | if (theSubset > -1) |
|---|
| 95 | return (double)m_distribution.maxClass(theSubset); |
|---|
| 96 | else |
|---|
| 97 | return (double)m_distribution.maxClass(); |
|---|
| 98 | } |
|---|
| 99 | |
|---|
| 100 | /** |
|---|
| 101 | * Gets class probability for instance. |
|---|
| 102 | * |
|---|
| 103 | * @exception Exception if something goes wrong |
|---|
| 104 | */ |
|---|
| 105 | public double classProb(int classIndex, Instance instance, int theSubset) |
|---|
| 106 | throws Exception { |
|---|
| 107 | |
|---|
| 108 | if (theSubset > -1) { |
|---|
| 109 | return m_distribution.prob(classIndex,theSubset); |
|---|
| 110 | } else { |
|---|
| 111 | double [] weights = weights(instance); |
|---|
| 112 | if (weights == null) { |
|---|
| 113 | return m_distribution.prob(classIndex); |
|---|
| 114 | } else { |
|---|
| 115 | double prob = 0; |
|---|
| 116 | for (int i = 0; i < weights.length; i++) { |
|---|
| 117 | prob += weights[i] * m_distribution.prob(classIndex, i); |
|---|
| 118 | } |
|---|
| 119 | return prob; |
|---|
| 120 | } |
|---|
| 121 | } |
|---|
| 122 | } |
|---|
| 123 | |
|---|
| 124 | /** |
|---|
| 125 | * Gets class probability for instance. |
|---|
| 126 | * |
|---|
| 127 | * @exception Exception if something goes wrong |
|---|
| 128 | */ |
|---|
| 129 | public double classProbLaplace(int classIndex, Instance instance, |
|---|
| 130 | int theSubset) throws Exception { |
|---|
| 131 | |
|---|
| 132 | if (theSubset > -1) { |
|---|
| 133 | return m_distribution.laplaceProb(classIndex, theSubset); |
|---|
| 134 | } else { |
|---|
| 135 | double [] weights = weights(instance); |
|---|
| 136 | if (weights == null) { |
|---|
| 137 | return m_distribution.laplaceProb(classIndex); |
|---|
| 138 | } else { |
|---|
| 139 | double prob = 0; |
|---|
| 140 | for (int i = 0; i < weights.length; i++) { |
|---|
| 141 | prob += weights[i] * m_distribution.laplaceProb(classIndex, i); |
|---|
| 142 | } |
|---|
| 143 | return prob; |
|---|
| 144 | } |
|---|
| 145 | } |
|---|
| 146 | } |
|---|
| 147 | |
|---|
| 148 | /** |
|---|
| 149 | * Returns coding costs of model. Returns 0 if not overwritten. |
|---|
| 150 | */ |
|---|
| 151 | public double codingCost() { |
|---|
| 152 | |
|---|
| 153 | return 0; |
|---|
| 154 | } |
|---|
| 155 | |
|---|
| 156 | /** |
|---|
| 157 | * Returns the distribution of class values induced by the model. |
|---|
| 158 | */ |
|---|
| 159 | public final Distribution distribution() { |
|---|
| 160 | |
|---|
| 161 | return m_distribution; |
|---|
| 162 | } |
|---|
| 163 | |
|---|
| 164 | /** |
|---|
| 165 | * Prints left side of condition satisfied by instances. |
|---|
| 166 | * |
|---|
| 167 | * @param data the data. |
|---|
| 168 | */ |
|---|
| 169 | public abstract String leftSide(Instances data); |
|---|
| 170 | |
|---|
| 171 | /** |
|---|
| 172 | * Prints left side of condition satisfied by instances in subset index. |
|---|
| 173 | */ |
|---|
| 174 | public abstract String rightSide(int index,Instances data); |
|---|
| 175 | |
|---|
| 176 | /** |
|---|
| 177 | * Prints label for subset index of instances (eg class). |
|---|
| 178 | * |
|---|
| 179 | * @exception Exception if something goes wrong |
|---|
| 180 | */ |
|---|
| 181 | public final String dumpLabel(int index,Instances data) throws Exception { |
|---|
| 182 | |
|---|
| 183 | StringBuffer text; |
|---|
| 184 | |
|---|
| 185 | text = new StringBuffer(); |
|---|
| 186 | text.append(((Instances)data).classAttribute(). |
|---|
| 187 | value(m_distribution.maxClass(index))); |
|---|
| 188 | text.append(" ("+Utils.roundDouble(m_distribution.perBag(index),2)); |
|---|
| 189 | if (Utils.gr(m_distribution.numIncorrect(index),0)) |
|---|
| 190 | text.append("/"+Utils.roundDouble(m_distribution.numIncorrect(index),2)); |
|---|
| 191 | text.append(")"); |
|---|
| 192 | |
|---|
| 193 | return text.toString(); |
|---|
| 194 | } |
|---|
| 195 | |
|---|
| 196 | public final String sourceClass(int index, Instances data) throws Exception { |
|---|
| 197 | |
|---|
| 198 | System.err.println("sourceClass"); |
|---|
| 199 | return (new StringBuffer(m_distribution.maxClass(index))).toString(); |
|---|
| 200 | } |
|---|
| 201 | |
|---|
| 202 | public abstract String sourceExpression(int index, Instances data); |
|---|
| 203 | |
|---|
| 204 | /** |
|---|
| 205 | * Prints the split model. |
|---|
| 206 | * |
|---|
| 207 | * @exception Exception if something goes wrong |
|---|
| 208 | */ |
|---|
| 209 | public final String dumpModel(Instances data) throws Exception { |
|---|
| 210 | |
|---|
| 211 | StringBuffer text; |
|---|
| 212 | int i; |
|---|
| 213 | |
|---|
| 214 | text = new StringBuffer(); |
|---|
| 215 | for (i=0;i<m_numSubsets;i++) { |
|---|
| 216 | text.append(leftSide(data)+rightSide(i,data)+": "); |
|---|
| 217 | text.append(dumpLabel(i,data)+"\n"); |
|---|
| 218 | } |
|---|
| 219 | return text.toString(); |
|---|
| 220 | } |
|---|
| 221 | |
|---|
| 222 | /** |
|---|
| 223 | * Returns the number of created subsets for the split. |
|---|
| 224 | */ |
|---|
| 225 | public final int numSubsets() { |
|---|
| 226 | |
|---|
| 227 | return m_numSubsets; |
|---|
| 228 | } |
|---|
| 229 | |
|---|
| 230 | /** |
|---|
| 231 | * Sets distribution associated with model. |
|---|
| 232 | */ |
|---|
| 233 | public void resetDistribution(Instances data) throws Exception { |
|---|
| 234 | |
|---|
| 235 | m_distribution = new Distribution(data, this); |
|---|
| 236 | } |
|---|
| 237 | |
|---|
| 238 | /** |
|---|
| 239 | * Splits the given set of instances into subsets. |
|---|
| 240 | * |
|---|
| 241 | * @exception Exception if something goes wrong |
|---|
| 242 | */ |
|---|
| 243 | public final Instances [] split(Instances data) |
|---|
| 244 | throws Exception { |
|---|
| 245 | |
|---|
| 246 | Instances [] instances = new Instances [m_numSubsets]; |
|---|
| 247 | double [] weights; |
|---|
| 248 | double newWeight; |
|---|
| 249 | Instance instance; |
|---|
| 250 | int subset, i, j; |
|---|
| 251 | |
|---|
| 252 | for (j=0;j<m_numSubsets;j++) |
|---|
| 253 | instances[j] = new Instances((Instances)data, |
|---|
| 254 | data.numInstances()); |
|---|
| 255 | for (i = 0; i < data.numInstances(); i++) { |
|---|
| 256 | instance = ((Instances) data).instance(i); |
|---|
| 257 | weights = weights(instance); |
|---|
| 258 | subset = whichSubset(instance); |
|---|
| 259 | if (subset > -1) |
|---|
| 260 | instances[subset].add(instance); |
|---|
| 261 | else |
|---|
| 262 | for (j = 0; j < m_numSubsets; j++) |
|---|
| 263 | if (Utils.gr(weights[j],0)) { |
|---|
| 264 | newWeight = weights[j]*instance.weight(); |
|---|
| 265 | instances[j].add(instance); |
|---|
| 266 | instances[j].lastInstance().setWeight(newWeight); |
|---|
| 267 | } |
|---|
| 268 | } |
|---|
| 269 | for (j = 0; j < m_numSubsets; j++) |
|---|
| 270 | instances[j].compactify(); |
|---|
| 271 | |
|---|
| 272 | return instances; |
|---|
| 273 | } |
|---|
| 274 | |
|---|
| 275 | /** |
|---|
| 276 | * Returns weights if instance is assigned to more than one subset. |
|---|
| 277 | * Returns null if instance is only assigned to one subset. |
|---|
| 278 | */ |
|---|
| 279 | public abstract double [] weights(Instance instance); |
|---|
| 280 | |
|---|
| 281 | /** |
|---|
| 282 | * Returns index of subset instance is assigned to. |
|---|
| 283 | * Returns -1 if instance is assigned to more than one subset. |
|---|
| 284 | * |
|---|
| 285 | * @exception Exception if something goes wrong |
|---|
| 286 | */ |
|---|
| 287 | public abstract int whichSubset(Instance instance) throws Exception; |
|---|
| 288 | } |
|---|
| 289 | |
|---|
| 290 | |
|---|
| 291 | |
|---|
| 292 | |
|---|
| 293 | |
|---|