1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * ResidualModelSelection.java |
---|
19 | * Copyright (C) 2003 University of Waikato, Hamilton, New Zealand |
---|
20 | * |
---|
21 | */ |
---|
22 | |
---|
23 | package weka.classifiers.trees.lmt; |
---|
24 | |
---|
25 | import weka.classifiers.trees.j48.ClassifierSplitModel; |
---|
26 | import weka.classifiers.trees.j48.Distribution; |
---|
27 | import weka.classifiers.trees.j48.ModelSelection; |
---|
28 | import weka.classifiers.trees.j48.NoSplit; |
---|
29 | import weka.core.Instances; |
---|
30 | import weka.core.RevisionUtils; |
---|
31 | |
---|
32 | /** |
---|
33 | * Helper class for logistic model trees (weka.classifiers.trees.lmt.LMT) to implement the |
---|
34 | * splitting criterion based on residuals. |
---|
35 | * |
---|
36 | * @author Niels Landwehr |
---|
37 | * @version $Revision: 1.4 $ |
---|
38 | */ |
---|
39 | public class ResidualModelSelection |
---|
40 | extends ModelSelection { |
---|
41 | |
---|
42 | /** for serialization */ |
---|
43 | private static final long serialVersionUID = -293098783159385148L; |
---|
44 | |
---|
45 | /** Minimum number of instances for leaves*/ |
---|
46 | protected int m_minNumInstances; |
---|
47 | |
---|
48 | /** Minimum information gain for split*/ |
---|
49 | protected double m_minInfoGain; |
---|
50 | |
---|
51 | /** |
---|
52 | * Constructor to create ResidualModelSelection object. |
---|
53 | * @param minNumInstances minimum number of instances for leaves |
---|
54 | */ |
---|
55 | public ResidualModelSelection(int minNumInstances) { |
---|
56 | m_minNumInstances = minNumInstances; |
---|
57 | m_minInfoGain = 1.0E-4; |
---|
58 | } |
---|
59 | |
---|
60 | /**Method not in use*/ |
---|
61 | public void cleanup() { |
---|
62 | //method not in use |
---|
63 | } |
---|
64 | |
---|
65 | /** |
---|
66 | * Selects split based on residuals for the given dataset. |
---|
67 | */ |
---|
68 | public final ClassifierSplitModel selectModel(Instances data, |
---|
69 | double[][] dataZs, double[][] dataWs) throws Exception{ |
---|
70 | |
---|
71 | int numAttributes = data.numAttributes(); |
---|
72 | |
---|
73 | if (numAttributes < 2) throw new Exception("Can't select Model without non-class attribute"); |
---|
74 | if (data.numInstances() < m_minNumInstances) return new NoSplit(new Distribution(data)); |
---|
75 | |
---|
76 | |
---|
77 | double bestGain = -Double.MAX_VALUE; |
---|
78 | int bestAttribute = -1; |
---|
79 | |
---|
80 | //try split on every attribute |
---|
81 | for (int i = 0; i < numAttributes; i++) { |
---|
82 | if (i != data.classIndex()) { |
---|
83 | |
---|
84 | //build split |
---|
85 | ResidualSplit split = new ResidualSplit(i); |
---|
86 | split.buildClassifier(data, dataZs, dataWs); |
---|
87 | |
---|
88 | if (split.checkModel(m_minNumInstances)){ |
---|
89 | |
---|
90 | //evaluate split |
---|
91 | double gain = split.entropyGain(); |
---|
92 | if (gain > bestGain) { |
---|
93 | bestGain = gain; |
---|
94 | bestAttribute = i; |
---|
95 | } |
---|
96 | } |
---|
97 | } |
---|
98 | } |
---|
99 | |
---|
100 | if (bestGain >= m_minInfoGain){ |
---|
101 | //return best split |
---|
102 | ResidualSplit split = new ResidualSplit(bestAttribute); |
---|
103 | split.buildClassifier(data, dataZs, dataWs); |
---|
104 | return split; |
---|
105 | } else { |
---|
106 | //could not find any split with enough information gain |
---|
107 | return new NoSplit(new Distribution(data)); |
---|
108 | } |
---|
109 | } |
---|
110 | |
---|
111 | /**Method not in use*/ |
---|
112 | public final ClassifierSplitModel selectModel(Instances train) { |
---|
113 | //method not in use |
---|
114 | return null; |
---|
115 | } |
---|
116 | |
---|
117 | /**Method not in use*/ |
---|
118 | public final ClassifierSplitModel selectModel(Instances train, Instances test) { |
---|
119 | //method not in use |
---|
120 | return null; |
---|
121 | } |
---|
122 | |
---|
123 | /** |
---|
124 | * Returns the revision string. |
---|
125 | * |
---|
126 | * @return the revision |
---|
127 | */ |
---|
128 | public String getRevision() { |
---|
129 | return RevisionUtils.extract("$Revision: 1.4 $"); |
---|
130 | } |
---|
131 | } |
---|