source: branches/MetisMQI/src/main/java/weka/classifiers/rules/part/MakeDecList.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 8.1 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    MakeDecList.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.classifiers.rules.part;
24
25import weka.classifiers.trees.j48.ModelSelection;
26import weka.core.Capabilities;
27import weka.core.CapabilitiesHandler;
28import weka.core.Instance;
29import weka.core.Instances;
30import weka.core.RevisionHandler;
31import weka.core.RevisionUtils;
32import weka.core.Utils;
33import weka.core.Capabilities.Capability;
34
35import java.io.Serializable;
36import java.util.Enumeration;
37import java.util.Random;
38import java.util.Vector;
39
40/**
41 * Class for handling a decision list.
42 *
43 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
44 * @version $Revision: 5483 $
45 */
46public class MakeDecList
47  implements Serializable, CapabilitiesHandler, RevisionHandler {
48
49  /** for serialization */
50  private static final long serialVersionUID = -1427481323245079123L;
51
52  /** Vector storing the rules. */
53  private Vector theRules;
54
55  /** The confidence for C45-type pruning. */
56  private double CF = 0.25f;
57
58  /** Minimum number of objects */
59  private int minNumObj;
60
61  /** The model selection method. */
62  private ModelSelection toSelectModeL;
63
64  /** How many subsets of equal size? One used for pruning, the rest for training. */
65  private int numSetS = 3;
66
67  /** Use reduced error pruning? */
68  private boolean reducedErrorPruning = false;
69
70  /** Generated unpruned list? */
71  private boolean unpruned = false;
72
73  /** The seed for random number generation. */
74  private int m_seed = 1;
75
76  /**
77   * Constructor for unpruned dec list.
78   */
79  public MakeDecList(ModelSelection toSelectLocModel,
80                     int minNum){
81
82    toSelectModeL = toSelectLocModel;
83    reducedErrorPruning = false;
84    unpruned = true;
85    minNumObj = minNum;
86  }
87
88  /**
89   * Constructor for dec list pruned using C4.5 pruning.
90   */
91  public MakeDecList(ModelSelection toSelectLocModel, double cf,
92                     int minNum){
93
94    toSelectModeL = toSelectLocModel;
95    CF = cf;
96    reducedErrorPruning = false;
97    unpruned = false;
98    minNumObj = minNum;
99  }
100
101  /**
102   * Constructor for dec list pruned using hold-out pruning.
103   */
104  public MakeDecList(ModelSelection toSelectLocModel, int num,
105                     int minNum, int seed){
106
107    toSelectModeL = toSelectLocModel;
108    numSetS = num;
109    reducedErrorPruning = true;
110    unpruned = false;
111    minNumObj = minNum;
112    m_seed = seed;
113  }
114
115  /**
116   * Returns default capabilities of the classifier.
117   *
118   * @return      the capabilities of this classifier
119   */
120  public Capabilities getCapabilities() {
121    Capabilities result = new Capabilities(this);
122    result.disableAll();
123
124    // attributes
125    result.enable(Capability.NOMINAL_ATTRIBUTES);
126    result.enable(Capability.NUMERIC_ATTRIBUTES);
127    result.enable(Capability.DATE_ATTRIBUTES);
128    result.enable(Capability.MISSING_VALUES);
129
130    // class
131    result.enable(Capability.NOMINAL_CLASS);
132    result.enable(Capability.MISSING_CLASS_VALUES);
133   
134    return result;
135  }
136
137  /**
138   * Builds dec list.
139   *
140   * @exception Exception if dec list can't be built successfully
141   */
142  public void buildClassifier(Instances data) throws Exception {
143   
144    // can classifier handle the data?
145    getCapabilities().testWithFail(data);
146
147    // remove instances with missing class
148    data = new Instances(data);
149    data.deleteWithMissingClass();
150   
151    ClassifierDecList currentRule;
152    double currentWeight;
153    Instances oldGrowData, newGrowData, oldPruneData,
154      newPruneData;
155    int numRules = 0;
156   
157    theRules = new Vector();
158    if ((reducedErrorPruning) && !(unpruned)){ 
159      Random random = new Random(m_seed);
160      data.randomize(random);
161      data.stratify(numSetS);
162      oldGrowData = data.trainCV(numSetS, numSetS - 1, random);
163      oldPruneData = data.testCV(numSetS, numSetS - 1);
164    } else {
165      oldGrowData = data;
166      oldPruneData = null;
167    }
168
169    while (Utils.gr(oldGrowData.numInstances(),0)){
170
171      // Create rule
172      if (unpruned) {
173        currentRule = new ClassifierDecList(toSelectModeL,
174                                            minNumObj);
175        ((ClassifierDecList)currentRule).buildRule(oldGrowData);
176      } else if (reducedErrorPruning) {
177        currentRule = new PruneableDecList(toSelectModeL,
178                                           minNumObj);
179        ((PruneableDecList)currentRule).buildRule(oldGrowData, 
180                                                  oldPruneData);
181      } else {
182        currentRule = new C45PruneableDecList(toSelectModeL, CF,
183                                              minNumObj);
184        ((C45PruneableDecList)currentRule).buildRule(oldGrowData);
185      }
186      numRules++;
187
188      // Remove instances from growing data
189      newGrowData = new Instances(oldGrowData,
190                                  oldGrowData.numInstances());
191      Enumeration enu = oldGrowData.enumerateInstances();
192      while (enu.hasMoreElements()) {
193        Instance instance = (Instance) enu.nextElement();
194        currentWeight = currentRule.weight(instance);
195        if (Utils.sm(currentWeight,1)) {
196          instance.setWeight(instance.weight()*(1-currentWeight));
197          newGrowData.add(instance);
198        }
199      }
200      newGrowData.compactify();
201      oldGrowData = newGrowData;
202     
203      // Remove instances from pruning data
204      if ((reducedErrorPruning) && !(unpruned)) {
205        newPruneData = new Instances(oldPruneData,
206                                             oldPruneData.numInstances());
207        enu = oldPruneData.enumerateInstances();
208        while (enu.hasMoreElements()) {
209          Instance instance = (Instance) enu.nextElement();
210          currentWeight = currentRule.weight(instance);
211          if (Utils.sm(currentWeight,1)) {
212            instance.setWeight(instance.weight()*(1-currentWeight));
213            newPruneData.add(instance);
214          }
215        }
216        newPruneData.compactify();
217        oldPruneData = newPruneData;
218      }
219      theRules.addElement(currentRule);
220    }
221  }
222
223  /**
224   * Outputs the classifier into a string.
225   */
226  public String toString(){
227
228    StringBuffer text = new StringBuffer();
229
230    for (int i=0;i<theRules.size();i++)
231      text.append((ClassifierDecList)theRules.elementAt(i)+"\n");
232    text.append("Number of Rules  : \t"+theRules.size()+"\n");
233
234    return text.toString();
235  }
236
237  /**
238   * Classifies an instance.
239   *
240   * @exception Exception if instance can't be classified
241   */
242  public double classifyInstance(Instance instance) 
243       throws Exception {
244
245    double maxProb = -1;
246    double [] sumProbs;
247    int maxIndex = 0;
248
249    sumProbs = distributionForInstance(instance);
250    for (int j = 0; j < sumProbs.length; j++) {
251      if (Utils.gr(sumProbs[j],maxProb)){
252        maxIndex = j;
253        maxProb = sumProbs[j];
254      }
255    }
256
257    return (double)maxIndex;
258  }
259
260  /**
261   * Returns the class distribution for an instance.
262   *
263   * @exception Exception if distribution can't be computed
264   */
265  public double[] distributionForInstance(Instance instance) 
266       throws Exception {
267
268    double [] currentProbs = null;
269    double [] sumProbs;
270    double currentWeight, weight = 1;
271    int i,j;
272       
273    // Get probabilities.
274    sumProbs = new double [instance.numClasses()];
275    i = 0;
276    while (Utils.gr(weight,0)){
277      currentWeight = 
278        ((ClassifierDecList)theRules.elementAt(i)).weight(instance);
279      if (Utils.gr(currentWeight,0)) {
280        currentProbs = ((ClassifierDecList)theRules.elementAt(i)).
281          distributionForInstance(instance);
282        for (j = 0; j < sumProbs.length; j++)
283          sumProbs[j] += weight*currentProbs[j];
284        weight = weight*(1-currentWeight);
285      }
286      i++;
287    }
288
289    return sumProbs;
290  }
291
292  /**
293   * Outputs the number of rules in the classifier.
294   */
295  public int numRules(){
296
297    return theRules.size();
298  }
299 
300  /**
301   * Returns the revision string.
302   *
303   * @return            the revision
304   */
305  public String getRevision() {
306    return RevisionUtils.extract("$Revision: 5483 $");
307  }
308}
Note: See TracBrowser for help on using the repository browser.