| 1 | /* |
|---|
| 2 | * This program is free software; you can redistribute it and/or modify |
|---|
| 3 | * it under the terms of the GNU General Public License as published by |
|---|
| 4 | * the Free Software Foundation; either version 2 of the License, or |
|---|
| 5 | * (at your option) any later version. |
|---|
| 6 | * |
|---|
| 7 | * This program is distributed in the hope that it will be useful, |
|---|
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 10 | * GNU General Public License for more details. |
|---|
| 11 | * |
|---|
| 12 | * You should have received a copy of the GNU General Public License |
|---|
| 13 | * along with this program; if not, write to the Free Software |
|---|
| 14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|---|
| 15 | */ |
|---|
| 16 | |
|---|
| 17 | /* |
|---|
| 18 | * ResidualModelSelection.java |
|---|
| 19 | * Copyright (C) 2003 University of Waikato, Hamilton, New Zealand |
|---|
| 20 | * |
|---|
| 21 | */ |
|---|
| 22 | |
|---|
| 23 | package weka.classifiers.trees.lmt; |
|---|
| 24 | |
|---|
| 25 | import weka.classifiers.trees.j48.ClassifierSplitModel; |
|---|
| 26 | import weka.classifiers.trees.j48.Distribution; |
|---|
| 27 | import weka.classifiers.trees.j48.ModelSelection; |
|---|
| 28 | import weka.classifiers.trees.j48.NoSplit; |
|---|
| 29 | import weka.core.Instances; |
|---|
| 30 | import weka.core.RevisionUtils; |
|---|
| 31 | |
|---|
| 32 | /** |
|---|
| 33 | * Helper class for logistic model trees (weka.classifiers.trees.lmt.LMT) to implement the |
|---|
| 34 | * splitting criterion based on residuals. |
|---|
| 35 | * |
|---|
| 36 | * @author Niels Landwehr |
|---|
| 37 | * @version $Revision: 1.4 $ |
|---|
| 38 | */ |
|---|
| 39 | public class ResidualModelSelection |
|---|
| 40 | extends ModelSelection { |
|---|
| 41 | |
|---|
| 42 | /** for serialization */ |
|---|
| 43 | private static final long serialVersionUID = -293098783159385148L; |
|---|
| 44 | |
|---|
| 45 | /** Minimum number of instances for leaves*/ |
|---|
| 46 | protected int m_minNumInstances; |
|---|
| 47 | |
|---|
| 48 | /** Minimum information gain for split*/ |
|---|
| 49 | protected double m_minInfoGain; |
|---|
| 50 | |
|---|
| 51 | /** |
|---|
| 52 | * Constructor to create ResidualModelSelection object. |
|---|
| 53 | * @param minNumInstances minimum number of instances for leaves |
|---|
| 54 | */ |
|---|
| 55 | public ResidualModelSelection(int minNumInstances) { |
|---|
| 56 | m_minNumInstances = minNumInstances; |
|---|
| 57 | m_minInfoGain = 1.0E-4; |
|---|
| 58 | } |
|---|
| 59 | |
|---|
| 60 | /**Method not in use*/ |
|---|
| 61 | public void cleanup() { |
|---|
| 62 | //method not in use |
|---|
| 63 | } |
|---|
| 64 | |
|---|
| 65 | /** |
|---|
| 66 | * Selects split based on residuals for the given dataset. |
|---|
| 67 | */ |
|---|
| 68 | public final ClassifierSplitModel selectModel(Instances data, |
|---|
| 69 | double[][] dataZs, double[][] dataWs) throws Exception{ |
|---|
| 70 | |
|---|
| 71 | int numAttributes = data.numAttributes(); |
|---|
| 72 | |
|---|
| 73 | if (numAttributes < 2) throw new Exception("Can't select Model without non-class attribute"); |
|---|
| 74 | if (data.numInstances() < m_minNumInstances) return new NoSplit(new Distribution(data)); |
|---|
| 75 | |
|---|
| 76 | |
|---|
| 77 | double bestGain = -Double.MAX_VALUE; |
|---|
| 78 | int bestAttribute = -1; |
|---|
| 79 | |
|---|
| 80 | //try split on every attribute |
|---|
| 81 | for (int i = 0; i < numAttributes; i++) { |
|---|
| 82 | if (i != data.classIndex()) { |
|---|
| 83 | |
|---|
| 84 | //build split |
|---|
| 85 | ResidualSplit split = new ResidualSplit(i); |
|---|
| 86 | split.buildClassifier(data, dataZs, dataWs); |
|---|
| 87 | |
|---|
| 88 | if (split.checkModel(m_minNumInstances)){ |
|---|
| 89 | |
|---|
| 90 | //evaluate split |
|---|
| 91 | double gain = split.entropyGain(); |
|---|
| 92 | if (gain > bestGain) { |
|---|
| 93 | bestGain = gain; |
|---|
| 94 | bestAttribute = i; |
|---|
| 95 | } |
|---|
| 96 | } |
|---|
| 97 | } |
|---|
| 98 | } |
|---|
| 99 | |
|---|
| 100 | if (bestGain >= m_minInfoGain){ |
|---|
| 101 | //return best split |
|---|
| 102 | ResidualSplit split = new ResidualSplit(bestAttribute); |
|---|
| 103 | split.buildClassifier(data, dataZs, dataWs); |
|---|
| 104 | return split; |
|---|
| 105 | } else { |
|---|
| 106 | //could not find any split with enough information gain |
|---|
| 107 | return new NoSplit(new Distribution(data)); |
|---|
| 108 | } |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | /**Method not in use*/ |
|---|
| 112 | public final ClassifierSplitModel selectModel(Instances train) { |
|---|
| 113 | //method not in use |
|---|
| 114 | return null; |
|---|
| 115 | } |
|---|
| 116 | |
|---|
| 117 | /**Method not in use*/ |
|---|
| 118 | public final ClassifierSplitModel selectModel(Instances train, Instances test) { |
|---|
| 119 | //method not in use |
|---|
| 120 | return null; |
|---|
| 121 | } |
|---|
| 122 | |
|---|
| 123 | /** |
|---|
| 124 | * Returns the revision string. |
|---|
| 125 | * |
|---|
| 126 | * @return the revision |
|---|
| 127 | */ |
|---|
| 128 | public String getRevision() { |
|---|
| 129 | return RevisionUtils.extract("$Revision: 1.4 $"); |
|---|
| 130 | } |
|---|
| 131 | } |
|---|