source: src/main/java/weka/classifiers/trees/j48/ClassifierSplitModel.java @ 4

Last change on this file since 4 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 7.3 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    ClassifierSplitModel.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.classifiers.trees.j48;
24
25import weka.core.Instance;
26import weka.core.Instances;
27import weka.core.RevisionHandler;
28import weka.core.Utils;
29
30import java.io.Serializable;
31
32/**
33 * Abstract class for classification models that can be used
34 * recursively to split the data.
35 *
36 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
37 * @version $Revision: 1.11 $
38 */
39public abstract class ClassifierSplitModel
40  implements Cloneable, Serializable, RevisionHandler {
41
42  /** for serialization */
43  private static final long serialVersionUID = 4280730118393457457L;
44
45  /** Distribution of class values. */ 
46  protected Distribution m_distribution; 
47
48  /** Number of created subsets. */
49  protected int m_numSubsets;         
50
51  /**
52   * Allows to clone a model (shallow copy).
53   */
54  public Object clone() {
55
56    Object clone = null;
57   
58    try {
59      clone = super.clone();
60    } catch (CloneNotSupportedException e) {
61    } 
62    return clone;
63  }
64
65  /**
66   * Builds the classifier split model for the given set of instances.
67   *
68   * @exception Exception if something goes wrong
69   */
70  public abstract void buildClassifier(Instances instances) throws Exception;
71 
72  /**
73   * Checks if generated model is valid.
74   */
75  public final boolean checkModel() {
76   
77    if (m_numSubsets > 0)
78      return true;
79    else
80      return false;
81  }
82 
83  /**
84   * Classifies a given instance.
85   *
86   * @exception Exception if something goes wrong
87   */
88  public final double classifyInstance(Instance instance)
89       throws Exception {
90   
91    int theSubset;
92   
93    theSubset = whichSubset(instance);
94    if (theSubset > -1)
95      return (double)m_distribution.maxClass(theSubset);
96    else
97      return (double)m_distribution.maxClass();
98  }
99
100  /**
101   * Gets class probability for instance.
102   *
103   * @exception Exception if something goes wrong
104   */
105  public double classProb(int classIndex, Instance instance, int theSubset) 
106       throws Exception {
107
108    if (theSubset > -1) {
109      return m_distribution.prob(classIndex,theSubset);
110    } else {
111      double [] weights = weights(instance);
112      if (weights == null) {
113        return m_distribution.prob(classIndex);
114      } else {
115        double prob = 0;
116        for (int i = 0; i < weights.length; i++) {
117          prob += weights[i] * m_distribution.prob(classIndex, i);
118        }
119        return prob;
120      }
121    }
122  }
123
124  /**
125   * Gets class probability for instance.
126   *
127   * @exception Exception if something goes wrong
128   */
129  public double classProbLaplace(int classIndex, Instance instance,
130                                 int theSubset) throws Exception {
131
132    if (theSubset > -1) {
133      return m_distribution.laplaceProb(classIndex, theSubset);
134    } else {
135      double [] weights = weights(instance);
136      if (weights == null) {
137        return m_distribution.laplaceProb(classIndex);
138      } else {
139        double prob = 0;
140        for (int i = 0; i < weights.length; i++) {
141          prob += weights[i] * m_distribution.laplaceProb(classIndex, i);
142        }
143        return prob;
144      }
145    }
146  }
147
148  /**
149   * Returns coding costs of model. Returns 0 if not overwritten.
150   */
151  public double codingCost() {
152
153    return 0;
154  }
155
156  /**
157   * Returns the distribution of class values induced by the model.
158   */
159  public final Distribution distribution() {
160
161    return m_distribution;
162  }
163
164  /**
165   * Prints left side of condition satisfied by instances.
166   *
167   * @param data the data.
168   */
169  public abstract String leftSide(Instances data);
170
171  /**
172   * Prints left side of condition satisfied by instances in subset index.
173   */
174  public abstract String rightSide(int index,Instances data);
175
176  /**
177   * Prints label for subset index of instances (eg class).
178   *
179   * @exception Exception if something goes wrong
180   */
181  public final String dumpLabel(int index,Instances data) throws Exception {
182
183    StringBuffer text;
184
185    text = new StringBuffer();
186    text.append(((Instances)data).classAttribute().
187                value(m_distribution.maxClass(index)));
188    text.append(" ("+Utils.roundDouble(m_distribution.perBag(index),2));
189    if (Utils.gr(m_distribution.numIncorrect(index),0))
190      text.append("/"+Utils.roundDouble(m_distribution.numIncorrect(index),2));
191    text.append(")");
192
193    return text.toString();
194  }
195 
196  public final String sourceClass(int index, Instances data) throws Exception {
197
198    System.err.println("sourceClass");
199    return (new StringBuffer(m_distribution.maxClass(index))).toString();
200  }
201
202  public abstract String sourceExpression(int index, Instances data);
203
204  /**
205   * Prints the split model.
206   *
207   * @exception Exception if something goes wrong
208   */
209  public final String dumpModel(Instances data) throws Exception {
210
211    StringBuffer text;
212    int i;
213
214    text = new StringBuffer();
215    for (i=0;i<m_numSubsets;i++) {
216      text.append(leftSide(data)+rightSide(i,data)+": ");
217      text.append(dumpLabel(i,data)+"\n");
218    }
219    return text.toString();
220  }
221 
222  /**
223   * Returns the number of created subsets for the split.
224   */
225  public final int numSubsets() {
226
227    return m_numSubsets;
228  }
229 
230  /**
231   * Sets distribution associated with model.
232   */
233  public void resetDistribution(Instances data) throws Exception {
234
235    m_distribution = new Distribution(data, this);
236  }
237
238  /**
239   * Splits the given set of instances into subsets.
240   *
241   * @exception Exception if something goes wrong
242   */
243  public final Instances [] split(Instances data) 
244       throws Exception { 
245
246    Instances [] instances = new Instances [m_numSubsets];
247    double [] weights;
248    double newWeight;
249    Instance instance;
250    int subset, i, j;
251
252    for (j=0;j<m_numSubsets;j++)
253      instances[j] = new Instances((Instances)data,
254                                            data.numInstances());
255    for (i = 0; i < data.numInstances(); i++) {
256      instance = ((Instances) data).instance(i);
257      weights = weights(instance);
258      subset = whichSubset(instance);
259      if (subset > -1)
260        instances[subset].add(instance);
261      else
262        for (j = 0; j < m_numSubsets; j++)
263          if (Utils.gr(weights[j],0)) {
264            newWeight = weights[j]*instance.weight();
265            instances[j].add(instance);
266            instances[j].lastInstance().setWeight(newWeight);
267          }
268    }
269    for (j = 0; j < m_numSubsets; j++)
270      instances[j].compactify();
271   
272    return instances;
273  }
274
275  /**
276   * Returns weights if instance is assigned to more than one subset.
277   * Returns null if instance is only assigned to one subset.
278   */
279  public abstract double [] weights(Instance instance);
280 
281  /**
282   * Returns index of subset instance is assigned to.
283   * Returns -1 if instance is assigned to more than one subset.
284   *
285   * @exception Exception if something goes wrong
286   */
287  public abstract int whichSubset(Instance instance) throws Exception;
288}
289
290
291
292
293
Note: See TracBrowser for help on using the repository browser.