source: src/main/java/weka/classifiers/meta/ClassificationViaRegression.java @ 4

Last change on this file since 4 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 8.2 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    ClassificationViaRegression.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.classifiers.meta;
24
25import weka.classifiers.Classifier;
26import weka.classifiers.AbstractClassifier;
27import weka.classifiers.SingleClassifierEnhancer;
28import weka.core.Capabilities;
29import weka.core.Instance;
30import weka.core.Instances;
31import weka.core.RevisionUtils;
32import weka.core.TechnicalInformation;
33import weka.core.TechnicalInformationHandler;
34import weka.core.Utils;
35import weka.core.Capabilities.Capability;
36import weka.core.TechnicalInformation.Field;
37import weka.core.TechnicalInformation.Type;
38import weka.filters.Filter;
39import weka.filters.unsupervised.attribute.MakeIndicator;
40
41/**
42 <!-- globalinfo-start -->
43 * Class for doing classification using regression methods. Class is binarized and one regression model is built for each class value. For more information, see, for example<br/>
44 * <br/>
45 * E. Frank, Y. Wang, S. Inglis, G. Holmes, I.H. Witten (1998). Using model trees for classification. Machine Learning. 32(1):63-76.
46 * <p/>
47 <!-- globalinfo-end -->
48 *
49 <!-- technical-bibtex-start -->
50 * BibTeX:
51 * <pre>
52 * &#64;article{Frank1998,
53 *    author = {E. Frank and Y. Wang and S. Inglis and G. Holmes and I.H. Witten},
54 *    journal = {Machine Learning},
55 *    number = {1},
56 *    pages = {63-76},
57 *    title = {Using model trees for classification},
58 *    volume = {32},
59 *    year = {1998}
60 * }
61 * </pre>
62 * <p/>
63 <!-- technical-bibtex-end -->
64 *
65 <!-- options-start -->
66 * Valid options are: <p/>
67 *
68 * <pre> -D
69 *  If set, classifier is run in debug mode and
70 *  may output additional info to the console</pre>
71 *
72 * <pre> -W
73 *  Full name of base classifier.
74 *  (default: weka.classifiers.trees.M5P)</pre>
75 *
76 * <pre>
77 * Options specific to classifier weka.classifiers.trees.M5P:
78 * </pre>
79 *
80 * <pre> -N
81 *  Use unpruned tree/rules</pre>
82 *
83 * <pre> -U
84 *  Use unsmoothed predictions</pre>
85 *
86 * <pre> -R
87 *  Build regression tree/rule rather than a model tree/rule</pre>
88 *
89 * <pre> -M &lt;minimum number of instances&gt;
90 *  Set minimum number of instances per leaf
91 *  (default 4)</pre>
92 *
93 * <pre> -L
94 *  Save instances at the nodes in
95 *  the tree (for visualization purposes)</pre>
96 *
97 <!-- options-end -->
98 *
99 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
100 * @author Len Trigg (trigg@cs.waikato.ac.nz)
101 * @version $Revision: 5928 $
102*/
103public class ClassificationViaRegression 
104  extends SingleClassifierEnhancer
105  implements TechnicalInformationHandler {
106
107  /** for serialization */
108  static final long serialVersionUID = 4500023123618669859L;
109 
110  /** The classifiers. (One for each class.) */
111  private Classifier[] m_Classifiers;
112
113  /** The filters used to transform the class. */
114  private MakeIndicator[] m_ClassFilters;
115
116  /**
117   * Default constructor.
118   */
119  public ClassificationViaRegression() {
120   
121    m_Classifier = new weka.classifiers.trees.M5P();
122  }
123   
124  /**
125   * Returns a string describing classifier
126   * @return a description suitable for
127   * displaying in the explorer/experimenter gui
128   */
129  public String globalInfo() {
130 
131    return "Class for doing classification using regression methods. Class is "
132      + "binarized and one regression model is built for each class value. For more "
133      + "information, see, for example\n\n"
134      + getTechnicalInformation().toString();
135  }
136
137  /**
138   * Returns an instance of a TechnicalInformation object, containing
139   * detailed information about the technical background of this class,
140   * e.g., paper reference or book this class is based on.
141   *
142   * @return the technical information about this class
143   */
144  public TechnicalInformation getTechnicalInformation() {
145    TechnicalInformation        result;
146   
147    result = new TechnicalInformation(Type.ARTICLE);
148    result.setValue(Field.AUTHOR, "E. Frank and Y. Wang and S. Inglis and G. Holmes and I.H. Witten");
149    result.setValue(Field.YEAR, "1998");
150    result.setValue(Field.TITLE, "Using model trees for classification");
151    result.setValue(Field.JOURNAL, "Machine Learning");
152    result.setValue(Field.VOLUME, "32");
153    result.setValue(Field.NUMBER, "1");
154    result.setValue(Field.PAGES, "63-76");
155   
156    return result;
157  }
158
159  /**
160   * String describing default classifier.
161   *
162   * @return the default classifier classname
163   */
164  protected String defaultClassifierString() {
165   
166    return "weka.classifiers.trees.M5P";
167  }
168
169  /**
170   * Returns default capabilities of the classifier.
171   *
172   * @return      the capabilities of this classifier
173   */
174  public Capabilities getCapabilities() {
175    Capabilities result = super.getCapabilities();
176
177    // class
178    result.disableAllClasses();
179    result.disableAllClassDependencies();
180    result.enable(Capability.NOMINAL_CLASS);
181   
182    return result;
183  }
184
185  /**
186   * Builds the classifiers.
187   *
188   * @param insts the training data.
189   * @throws Exception if a classifier can't be built
190   */
191  public void buildClassifier(Instances insts) throws Exception {
192
193    Instances newInsts;
194
195    // can classifier handle the data?
196    getCapabilities().testWithFail(insts);
197
198    // remove instances with missing class
199    insts = new Instances(insts);
200    insts.deleteWithMissingClass();
201   
202    m_Classifiers = AbstractClassifier.makeCopies(m_Classifier, insts.numClasses());
203    m_ClassFilters = new MakeIndicator[insts.numClasses()];
204    for (int i = 0; i < insts.numClasses(); i++) {
205      m_ClassFilters[i] = new MakeIndicator();
206      m_ClassFilters[i].setAttributeIndex("" + (insts.classIndex() + 1));
207      m_ClassFilters[i].setValueIndex(i);
208      m_ClassFilters[i].setNumeric(true);
209      m_ClassFilters[i].setInputFormat(insts);
210      newInsts = Filter.useFilter(insts, m_ClassFilters[i]);
211      m_Classifiers[i].buildClassifier(newInsts);
212    }
213  }
214
215  /**
216   * Returns the distribution for an instance.
217   *
218   * @param inst the instance to get the distribution for
219   * @return the computed distribution
220   * @throws Exception if the distribution can't be computed successfully
221   */
222  public double[] distributionForInstance(Instance inst) throws Exception {
223   
224    double[] probs = new double[inst.numClasses()];
225    Instance newInst;
226    double sum = 0;
227
228    for (int i = 0; i < inst.numClasses(); i++) {
229      m_ClassFilters[i].input(inst);
230      m_ClassFilters[i].batchFinished();
231      newInst = m_ClassFilters[i].output();
232      probs[i] = m_Classifiers[i].classifyInstance(newInst);
233      if (probs[i] > 1) {
234        probs[i] = 1;
235      }
236      if (probs[i] < 0){
237        probs[i] = 0;
238      }
239      sum += probs[i];
240    }
241    if (sum != 0) {
242      Utils.normalize(probs, sum);
243    } 
244    return probs;
245  }
246
247  /**
248   * Prints the classifiers.
249   *
250   * @return a string representation of the classifier
251   */
252  public String toString() {
253
254    if (m_Classifiers == null) {
255      return "Classification via Regression: No model built yet.";
256    }
257    StringBuffer text = new StringBuffer();
258    text.append("Classification via Regression\n\n");
259    for (int i = 0; i < m_Classifiers.length; i++) {
260      text.append("Classifier for class with index " + i + ":\n\n");
261      text.append(m_Classifiers[i].toString() + "\n\n");
262    }
263    return text.toString();
264  }
265 
266  /**
267   * Returns the revision string.
268   *
269   * @return            the revision
270   */
271  public String getRevision() {
272    return RevisionUtils.extract("$Revision: 5928 $");
273  }
274
275  /**
276   * Main method for testing this class.
277   *
278   * @param argv the options for the learner
279   */
280  public static void main(String [] argv){
281    runClassifier(new ClassificationViaRegression(), argv);
282  }
283}
Note: See TracBrowser for help on using the repository browser.