1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * LMT.java |
---|
19 | * Copyright (C) 2003 University of Waikato, Hamilton, New Zealand |
---|
20 | * |
---|
21 | */ |
---|
22 | |
---|
23 | package weka.classifiers.trees; |
---|
24 | |
---|
25 | import weka.classifiers.Classifier; |
---|
26 | import weka.classifiers.AbstractClassifier; |
---|
27 | import weka.classifiers.trees.j48.C45ModelSelection; |
---|
28 | import weka.classifiers.trees.j48.ModelSelection; |
---|
29 | import weka.classifiers.trees.lmt.LMTNode; |
---|
30 | import weka.classifiers.trees.lmt.ResidualModelSelection; |
---|
31 | import weka.core.AdditionalMeasureProducer; |
---|
32 | import weka.core.Capabilities; |
---|
33 | import weka.core.Drawable; |
---|
34 | import weka.core.Instance; |
---|
35 | import weka.core.Instances; |
---|
36 | import weka.core.Option; |
---|
37 | import weka.core.OptionHandler; |
---|
38 | import weka.core.RevisionUtils; |
---|
39 | import weka.core.TechnicalInformation; |
---|
40 | import weka.core.TechnicalInformationHandler; |
---|
41 | import weka.core.Utils; |
---|
42 | import weka.core.Capabilities.Capability; |
---|
43 | import weka.core.TechnicalInformation.Field; |
---|
44 | import weka.core.TechnicalInformation.Type; |
---|
45 | import weka.filters.Filter; |
---|
46 | import weka.filters.supervised.attribute.NominalToBinary; |
---|
47 | import weka.filters.unsupervised.attribute.ReplaceMissingValues; |
---|
48 | |
---|
49 | import java.util.Enumeration; |
---|
50 | import java.util.Vector; |
---|
51 | |
---|
52 | /** |
---|
53 | <!-- globalinfo-start --> |
---|
54 | * Classifier for building 'logistic model trees', which are classification trees with logistic regression functions at the leaves. The algorithm can deal with binary and multi-class target variables, numeric and nominal attributes and missing values.<br/> |
---|
55 | * <br/> |
---|
56 | * For more information see: <br/> |
---|
57 | * <br/> |
---|
58 | * Niels Landwehr, Mark Hall, Eibe Frank (2005). Logistic Model Trees. Machine Learning. 95(1-2):161-205.<br/> |
---|
59 | * <br/> |
---|
60 | * Marc Sumner, Eibe Frank, Mark Hall: Speeding up Logistic Model Tree Induction. In: 9th European Conference on Principles and Practice of Knowledge Discovery in Databases, 675-683, 2005. |
---|
61 | * <p/> |
---|
62 | <!-- globalinfo-end --> |
---|
63 | * |
---|
64 | <!-- technical-bibtex-start --> |
---|
65 | * BibTeX: |
---|
66 | * <pre> |
---|
67 | * @article{Landwehr2005, |
---|
68 | * author = {Niels Landwehr and Mark Hall and Eibe Frank}, |
---|
69 | * journal = {Machine Learning}, |
---|
70 | * number = {1-2}, |
---|
71 | * pages = {161-205}, |
---|
72 | * title = {Logistic Model Trees}, |
---|
73 | * volume = {95}, |
---|
74 | * year = {2005} |
---|
75 | * } |
---|
76 | * |
---|
77 | * @inproceedings{Sumner2005, |
---|
78 | * author = {Marc Sumner and Eibe Frank and Mark Hall}, |
---|
79 | * booktitle = {9th European Conference on Principles and Practice of Knowledge Discovery in Databases}, |
---|
80 | * pages = {675-683}, |
---|
81 | * publisher = {Springer}, |
---|
82 | * title = {Speeding up Logistic Model Tree Induction}, |
---|
83 | * year = {2005} |
---|
84 | * } |
---|
85 | * </pre> |
---|
86 | * <p/> |
---|
87 | <!-- technical-bibtex-end --> |
---|
88 | * |
---|
89 | <!-- options-start --> |
---|
90 | * Valid options are: <p/> |
---|
91 | * |
---|
92 | * <pre> -B |
---|
93 | * Binary splits (convert nominal attributes to binary ones)</pre> |
---|
94 | * |
---|
95 | * <pre> -R |
---|
96 | * Split on residuals instead of class values</pre> |
---|
97 | * |
---|
98 | * <pre> -C |
---|
99 | * Use cross-validation for boosting at all nodes (i.e., disable heuristic)</pre> |
---|
100 | * |
---|
101 | * <pre> -P |
---|
102 | * Use error on probabilities instead of misclassification error for stopping criterion of LogitBoost.</pre> |
---|
103 | * |
---|
104 | * <pre> -I <numIterations> |
---|
105 | * Set fixed number of iterations for LogitBoost (instead of using cross-validation)</pre> |
---|
106 | * |
---|
107 | * <pre> -M <numInstances> |
---|
108 | * Set minimum number of instances at which a node can be split (default 15)</pre> |
---|
109 | * |
---|
110 | * <pre> -W <beta> |
---|
111 | * Set beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.</pre> |
---|
112 | * |
---|
113 | * <pre> -A |
---|
114 | * The AIC is used to choose the best iteration.</pre> |
---|
115 | * |
---|
116 | <!-- options-end --> |
---|
117 | * |
---|
118 | * @author Niels Landwehr |
---|
119 | * @author Marc Sumner |
---|
120 | * @version $Revision: 6088 $ |
---|
121 | */ |
---|
122 | public class LMT |
---|
123 | extends AbstractClassifier |
---|
124 | implements OptionHandler, AdditionalMeasureProducer, Drawable, |
---|
125 | TechnicalInformationHandler { |
---|
126 | |
---|
127 | /** for serialization */ |
---|
128 | static final long serialVersionUID = -1113212459618104943L; |
---|
129 | |
---|
130 | /** Filter to replace missing values*/ |
---|
131 | protected ReplaceMissingValues m_replaceMissing; |
---|
132 | |
---|
133 | /** Filter to replace nominal attributes*/ |
---|
134 | protected NominalToBinary m_nominalToBinary; |
---|
135 | |
---|
136 | /** root of the logistic model tree*/ |
---|
137 | protected LMTNode m_tree; |
---|
138 | |
---|
139 | /** use heuristic that determines the number of LogitBoost iterations only once in the beginning?*/ |
---|
140 | protected boolean m_fastRegression; |
---|
141 | |
---|
142 | /** convert nominal attributes to binary ?*/ |
---|
143 | protected boolean m_convertNominal; |
---|
144 | |
---|
145 | /** split on residuals?*/ |
---|
146 | protected boolean m_splitOnResiduals; |
---|
147 | |
---|
148 | /**use error on probabilties instead of misclassification for stopping criterion of LogitBoost?*/ |
---|
149 | protected boolean m_errorOnProbabilities; |
---|
150 | |
---|
151 | /**minimum number of instances at which a node is considered for splitting*/ |
---|
152 | protected int m_minNumInstances; |
---|
153 | |
---|
154 | /**if non-zero, use fixed number of iterations for LogitBoost*/ |
---|
155 | protected int m_numBoostingIterations; |
---|
156 | |
---|
157 | /**Threshold for trimming weights. Instances with a weight lower than this (as a percentage |
---|
158 | * of total weights) are not included in the regression fit. |
---|
159 | **/ |
---|
160 | protected double m_weightTrimBeta; |
---|
161 | |
---|
162 | /** If true, the AIC is used to choose the best LogitBoost iteration*/ |
---|
163 | private boolean m_useAIC = false; |
---|
164 | |
---|
165 | /** |
---|
166 | * Creates an instance of LMT with standard options |
---|
167 | */ |
---|
168 | public LMT() { |
---|
169 | m_fastRegression = true; |
---|
170 | m_numBoostingIterations = -1; |
---|
171 | m_minNumInstances = 15; |
---|
172 | m_weightTrimBeta = 0; |
---|
173 | m_useAIC = false; |
---|
174 | } |
---|
175 | |
---|
176 | /** |
---|
177 | * Returns default capabilities of the classifier. |
---|
178 | * |
---|
179 | * @return the capabilities of this classifier |
---|
180 | */ |
---|
181 | public Capabilities getCapabilities() { |
---|
182 | Capabilities result = super.getCapabilities(); |
---|
183 | result.disableAll(); |
---|
184 | |
---|
185 | // attributes |
---|
186 | result.enable(Capability.NOMINAL_ATTRIBUTES); |
---|
187 | result.enable(Capability.NUMERIC_ATTRIBUTES); |
---|
188 | result.enable(Capability.DATE_ATTRIBUTES); |
---|
189 | result.enable(Capability.MISSING_VALUES); |
---|
190 | |
---|
191 | // class |
---|
192 | result.enable(Capability.NOMINAL_CLASS); |
---|
193 | result.enable(Capability.MISSING_CLASS_VALUES); |
---|
194 | |
---|
195 | return result; |
---|
196 | } |
---|
197 | |
---|
198 | /** |
---|
199 | * Builds the classifier. |
---|
200 | * |
---|
201 | * @param data the data to train with |
---|
202 | * @throws Exception if classifier can't be built successfully |
---|
203 | */ |
---|
204 | public void buildClassifier(Instances data) throws Exception{ |
---|
205 | |
---|
206 | // can classifier handle the data? |
---|
207 | getCapabilities().testWithFail(data); |
---|
208 | |
---|
209 | // remove instances with missing class |
---|
210 | Instances filteredData = new Instances(data); |
---|
211 | filteredData.deleteWithMissingClass(); |
---|
212 | |
---|
213 | //replace missing values |
---|
214 | m_replaceMissing = new ReplaceMissingValues(); |
---|
215 | m_replaceMissing.setInputFormat(filteredData); |
---|
216 | filteredData = Filter.useFilter(filteredData, m_replaceMissing); |
---|
217 | |
---|
218 | //possibly convert nominal attributes globally |
---|
219 | if (m_convertNominal) { |
---|
220 | m_nominalToBinary = new NominalToBinary(); |
---|
221 | m_nominalToBinary.setInputFormat(filteredData); |
---|
222 | filteredData = Filter.useFilter(filteredData, m_nominalToBinary); |
---|
223 | } |
---|
224 | |
---|
225 | int minNumInstances = 2; |
---|
226 | |
---|
227 | //create ModelSelection object, either for splits on the residuals or for splits on the class value |
---|
228 | ModelSelection modSelection; |
---|
229 | if (m_splitOnResiduals) { |
---|
230 | modSelection = new ResidualModelSelection(minNumInstances); |
---|
231 | } else { |
---|
232 | modSelection = new C45ModelSelection(minNumInstances, filteredData, true); |
---|
233 | } |
---|
234 | |
---|
235 | //create tree root |
---|
236 | m_tree = new LMTNode(modSelection, m_numBoostingIterations, m_fastRegression, |
---|
237 | m_errorOnProbabilities, m_minNumInstances, m_weightTrimBeta, m_useAIC); |
---|
238 | //build tree |
---|
239 | m_tree.buildClassifier(filteredData); |
---|
240 | |
---|
241 | if (modSelection instanceof C45ModelSelection) ((C45ModelSelection)modSelection).cleanup(); |
---|
242 | } |
---|
243 | |
---|
244 | /** |
---|
245 | * Returns class probabilities for an instance. |
---|
246 | * |
---|
247 | * @param instance the instance to compute the distribution for |
---|
248 | * @return the class probabilities |
---|
249 | * @throws Exception if distribution can't be computed successfully |
---|
250 | */ |
---|
251 | public double [] distributionForInstance(Instance instance) throws Exception { |
---|
252 | |
---|
253 | //replace missing values |
---|
254 | m_replaceMissing.input(instance); |
---|
255 | instance = m_replaceMissing.output(); |
---|
256 | |
---|
257 | //possibly convert nominal attributes |
---|
258 | if (m_convertNominal) { |
---|
259 | m_nominalToBinary.input(instance); |
---|
260 | instance = m_nominalToBinary.output(); |
---|
261 | } |
---|
262 | |
---|
263 | return m_tree.distributionForInstance(instance); |
---|
264 | } |
---|
265 | |
---|
266 | /** |
---|
267 | * Classifies an instance. |
---|
268 | * |
---|
269 | * @param instance the instance to classify |
---|
270 | * @return the classification |
---|
271 | * @throws Exception if instance can't be classified successfully |
---|
272 | */ |
---|
273 | public double classifyInstance(Instance instance) throws Exception { |
---|
274 | |
---|
275 | double maxProb = -1; |
---|
276 | int maxIndex = 0; |
---|
277 | |
---|
278 | //classify by maximum probability |
---|
279 | double[] probs = distributionForInstance(instance); |
---|
280 | for (int j = 0; j < instance.numClasses(); j++) { |
---|
281 | if (Utils.gr(probs[j], maxProb)) { |
---|
282 | maxIndex = j; |
---|
283 | maxProb = probs[j]; |
---|
284 | } |
---|
285 | } |
---|
286 | return (double)maxIndex; |
---|
287 | } |
---|
288 | |
---|
289 | /** |
---|
290 | * Returns a description of the classifier. |
---|
291 | * |
---|
292 | * @return a string representation of the classifier |
---|
293 | */ |
---|
294 | public String toString() { |
---|
295 | if (m_tree!=null) { |
---|
296 | return "Logistic model tree \n------------------\n" + m_tree.toString(); |
---|
297 | } else { |
---|
298 | return "No tree build"; |
---|
299 | } |
---|
300 | } |
---|
301 | |
---|
302 | /** |
---|
303 | * Returns an enumeration describing the available options. |
---|
304 | * |
---|
305 | * @return an enumeration of all the available options. |
---|
306 | */ |
---|
307 | public Enumeration listOptions() { |
---|
308 | Vector newVector = new Vector(8); |
---|
309 | |
---|
310 | newVector.addElement(new Option("\tBinary splits (convert nominal attributes to binary ones)", |
---|
311 | "B", 0, "-B")); |
---|
312 | |
---|
313 | newVector.addElement(new Option("\tSplit on residuals instead of class values", |
---|
314 | "R", 0, "-R")); |
---|
315 | |
---|
316 | newVector.addElement(new Option("\tUse cross-validation for boosting at all nodes (i.e., disable heuristic)", |
---|
317 | "C", 0, "-C")); |
---|
318 | |
---|
319 | newVector.addElement(new Option("\tUse error on probabilities instead of misclassification error "+ |
---|
320 | "for stopping criterion of LogitBoost.", |
---|
321 | "P", 0, "-P")); |
---|
322 | |
---|
323 | newVector.addElement(new Option("\tSet fixed number of iterations for LogitBoost (instead of using "+ |
---|
324 | "cross-validation)", |
---|
325 | "I",1,"-I <numIterations>")); |
---|
326 | |
---|
327 | newVector.addElement(new Option("\tSet minimum number of instances at which a node can be split (default 15)", |
---|
328 | "M",1,"-M <numInstances>")); |
---|
329 | |
---|
330 | newVector.addElement(new Option("\tSet beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.", |
---|
331 | "W",1,"-W <beta>")); |
---|
332 | |
---|
333 | newVector.addElement(new Option("\tThe AIC is used to choose the best iteration.", |
---|
334 | "A", 0, "-A")); |
---|
335 | |
---|
336 | return newVector.elements(); |
---|
337 | } |
---|
338 | |
---|
339 | /** |
---|
340 | * Parses a given list of options. <p/> |
---|
341 | * |
---|
342 | <!-- options-start --> |
---|
343 | * Valid options are: <p/> |
---|
344 | * |
---|
345 | * <pre> -B |
---|
346 | * Binary splits (convert nominal attributes to binary ones)</pre> |
---|
347 | * |
---|
348 | * <pre> -R |
---|
349 | * Split on residuals instead of class values</pre> |
---|
350 | * |
---|
351 | * <pre> -C |
---|
352 | * Use cross-validation for boosting at all nodes (i.e., disable heuristic)</pre> |
---|
353 | * |
---|
354 | * <pre> -P |
---|
355 | * Use error on probabilities instead of misclassification error for stopping criterion of LogitBoost.</pre> |
---|
356 | * |
---|
357 | * <pre> -I <numIterations> |
---|
358 | * Set fixed number of iterations for LogitBoost (instead of using cross-validation)</pre> |
---|
359 | * |
---|
360 | * <pre> -M <numInstances> |
---|
361 | * Set minimum number of instances at which a node can be split (default 15)</pre> |
---|
362 | * |
---|
363 | * <pre> -W <beta> |
---|
364 | * Set beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.</pre> |
---|
365 | * |
---|
366 | * <pre> -A |
---|
367 | * The AIC is used to choose the best iteration.</pre> |
---|
368 | * |
---|
369 | <!-- options-end --> |
---|
370 | * |
---|
371 | * @param options the list of options as an array of strings |
---|
372 | * @throws Exception if an option is not supported |
---|
373 | */ |
---|
374 | public void setOptions(String[] options) throws Exception { |
---|
375 | |
---|
376 | setConvertNominal(Utils.getFlag('B', options)); |
---|
377 | setSplitOnResiduals(Utils.getFlag('R', options)); |
---|
378 | setFastRegression(!Utils.getFlag('C', options)); |
---|
379 | setErrorOnProbabilities(Utils.getFlag('P', options)); |
---|
380 | |
---|
381 | String optionString = Utils.getOption('I', options); |
---|
382 | if (optionString.length() != 0) { |
---|
383 | setNumBoostingIterations((new Integer(optionString)).intValue()); |
---|
384 | } |
---|
385 | |
---|
386 | optionString = Utils.getOption('M', options); |
---|
387 | if (optionString.length() != 0) { |
---|
388 | setMinNumInstances((new Integer(optionString)).intValue()); |
---|
389 | } |
---|
390 | |
---|
391 | optionString = Utils.getOption('W', options); |
---|
392 | if (optionString.length() != 0) { |
---|
393 | setWeightTrimBeta((new Double(optionString)).doubleValue()); |
---|
394 | } |
---|
395 | |
---|
396 | setUseAIC(Utils.getFlag('A', options)); |
---|
397 | |
---|
398 | Utils.checkForRemainingOptions(options); |
---|
399 | |
---|
400 | } |
---|
401 | |
---|
402 | /** |
---|
403 | * Gets the current settings of the Classifier. |
---|
404 | * |
---|
405 | * @return an array of strings suitable for passing to setOptions |
---|
406 | */ |
---|
407 | public String[] getOptions() { |
---|
408 | String[] options = new String[11]; |
---|
409 | int current = 0; |
---|
410 | |
---|
411 | if (getConvertNominal()) { |
---|
412 | options[current++] = "-B"; |
---|
413 | } |
---|
414 | |
---|
415 | if (getSplitOnResiduals()) { |
---|
416 | options[current++] = "-R"; |
---|
417 | } |
---|
418 | |
---|
419 | if (!getFastRegression()) { |
---|
420 | options[current++] = "-C"; |
---|
421 | } |
---|
422 | |
---|
423 | if (getErrorOnProbabilities()) { |
---|
424 | options[current++] = "-P"; |
---|
425 | } |
---|
426 | |
---|
427 | options[current++] = "-I"; |
---|
428 | options[current++] = ""+getNumBoostingIterations(); |
---|
429 | |
---|
430 | options[current++] = "-M"; |
---|
431 | options[current++] = ""+getMinNumInstances(); |
---|
432 | |
---|
433 | options[current++] = "-W"; |
---|
434 | options[current++] = ""+getWeightTrimBeta(); |
---|
435 | |
---|
436 | if (getUseAIC()) { |
---|
437 | options[current++] = "-A"; |
---|
438 | } |
---|
439 | |
---|
440 | while (current < options.length) { |
---|
441 | options[current++] = ""; |
---|
442 | } |
---|
443 | return options; |
---|
444 | } |
---|
445 | |
---|
446 | /** |
---|
447 | * Get the value of weightTrimBeta. |
---|
448 | */ |
---|
449 | public double getWeightTrimBeta(){ |
---|
450 | return m_weightTrimBeta; |
---|
451 | } |
---|
452 | |
---|
453 | /** |
---|
454 | * Get the value of useAIC. |
---|
455 | * |
---|
456 | * @return Value of useAIC. |
---|
457 | */ |
---|
458 | public boolean getUseAIC(){ |
---|
459 | return m_useAIC; |
---|
460 | } |
---|
461 | |
---|
462 | /** |
---|
463 | * Set the value of weightTrimBeta. |
---|
464 | */ |
---|
465 | public void setWeightTrimBeta(double n){ |
---|
466 | m_weightTrimBeta = n; |
---|
467 | } |
---|
468 | |
---|
469 | /** |
---|
470 | * Set the value of useAIC. |
---|
471 | * |
---|
472 | * @param c Value to assign to useAIC. |
---|
473 | */ |
---|
474 | public void setUseAIC(boolean c){ |
---|
475 | m_useAIC = c; |
---|
476 | } |
---|
477 | |
---|
478 | /** |
---|
479 | * Get the value of convertNominal. |
---|
480 | * |
---|
481 | * @return Value of convertNominal. |
---|
482 | */ |
---|
483 | public boolean getConvertNominal(){ |
---|
484 | return m_convertNominal; |
---|
485 | } |
---|
486 | |
---|
487 | /** |
---|
488 | * Get the value of splitOnResiduals. |
---|
489 | * |
---|
490 | * @return Value of splitOnResiduals. |
---|
491 | */ |
---|
492 | public boolean getSplitOnResiduals(){ |
---|
493 | return m_splitOnResiduals; |
---|
494 | } |
---|
495 | |
---|
496 | /** |
---|
497 | * Get the value of fastRegression. |
---|
498 | * |
---|
499 | * @return Value of fastRegression. |
---|
500 | */ |
---|
501 | public boolean getFastRegression(){ |
---|
502 | return m_fastRegression; |
---|
503 | } |
---|
504 | |
---|
505 | /** |
---|
506 | * Get the value of errorOnProbabilities. |
---|
507 | * |
---|
508 | * @return Value of errorOnProbabilities. |
---|
509 | */ |
---|
510 | public boolean getErrorOnProbabilities(){ |
---|
511 | return m_errorOnProbabilities; |
---|
512 | } |
---|
513 | |
---|
514 | /** |
---|
515 | * Get the value of numBoostingIterations. |
---|
516 | * |
---|
517 | * @return Value of numBoostingIterations. |
---|
518 | */ |
---|
519 | public int getNumBoostingIterations(){ |
---|
520 | return m_numBoostingIterations; |
---|
521 | } |
---|
522 | |
---|
523 | /** |
---|
524 | * Get the value of minNumInstances. |
---|
525 | * |
---|
526 | * @return Value of minNumInstances. |
---|
527 | */ |
---|
528 | public int getMinNumInstances(){ |
---|
529 | return m_minNumInstances; |
---|
530 | } |
---|
531 | |
---|
532 | /** |
---|
533 | * Set the value of convertNominal. |
---|
534 | * |
---|
535 | * @param c Value to assign to convertNominal. |
---|
536 | */ |
---|
537 | public void setConvertNominal(boolean c){ |
---|
538 | m_convertNominal = c; |
---|
539 | } |
---|
540 | |
---|
541 | /** |
---|
542 | * Set the value of splitOnResiduals. |
---|
543 | * |
---|
544 | * @param c Value to assign to splitOnResiduals. |
---|
545 | */ |
---|
546 | public void setSplitOnResiduals(boolean c){ |
---|
547 | m_splitOnResiduals = c; |
---|
548 | } |
---|
549 | |
---|
550 | /** |
---|
551 | * Set the value of fastRegression. |
---|
552 | * |
---|
553 | * @param c Value to assign to fastRegression. |
---|
554 | */ |
---|
555 | public void setFastRegression(boolean c){ |
---|
556 | m_fastRegression = c; |
---|
557 | } |
---|
558 | |
---|
559 | /** |
---|
560 | * Set the value of errorOnProbabilities. |
---|
561 | * |
---|
562 | * @param c Value to assign to errorOnProbabilities. |
---|
563 | */ |
---|
564 | public void setErrorOnProbabilities(boolean c){ |
---|
565 | m_errorOnProbabilities = c; |
---|
566 | } |
---|
567 | |
---|
568 | /** |
---|
569 | * Set the value of numBoostingIterations. |
---|
570 | * |
---|
571 | * @param c Value to assign to numBoostingIterations. |
---|
572 | */ |
---|
573 | public void setNumBoostingIterations(int c){ |
---|
574 | m_numBoostingIterations = c; |
---|
575 | } |
---|
576 | |
---|
577 | /** |
---|
578 | * Set the value of minNumInstances. |
---|
579 | * |
---|
580 | * @param c Value to assign to minNumInstances. |
---|
581 | */ |
---|
582 | public void setMinNumInstances(int c){ |
---|
583 | m_minNumInstances = c; |
---|
584 | } |
---|
585 | |
---|
586 | /** |
---|
587 | * Returns the type of graph this classifier |
---|
588 | * represents. |
---|
589 | * @return Drawable.TREE |
---|
590 | */ |
---|
591 | public int graphType() { |
---|
592 | return Drawable.TREE; |
---|
593 | } |
---|
594 | |
---|
595 | /** |
---|
596 | * Returns graph describing the tree. |
---|
597 | * |
---|
598 | * @return the graph describing the tree |
---|
599 | * @throws Exception if graph can't be computed |
---|
600 | */ |
---|
601 | public String graph() throws Exception { |
---|
602 | |
---|
603 | return m_tree.graph(); |
---|
604 | } |
---|
605 | |
---|
606 | /** |
---|
607 | * Returns the size of the tree |
---|
608 | * @return the size of the tree |
---|
609 | */ |
---|
610 | public int measureTreeSize(){ |
---|
611 | return m_tree.numNodes(); |
---|
612 | } |
---|
613 | |
---|
614 | /** |
---|
615 | * Returns the number of leaves in the tree |
---|
616 | * @return the number of leaves in the tree |
---|
617 | */ |
---|
618 | public int measureNumLeaves(){ |
---|
619 | return m_tree.numLeaves(); |
---|
620 | } |
---|
621 | |
---|
622 | /** |
---|
623 | * Returns an enumeration of the additional measure names |
---|
624 | * @return an enumeration of the measure names |
---|
625 | */ |
---|
626 | public Enumeration enumerateMeasures() { |
---|
627 | Vector newVector = new Vector(2); |
---|
628 | newVector.addElement("measureTreeSize"); |
---|
629 | newVector.addElement("measureNumLeaves"); |
---|
630 | |
---|
631 | return newVector.elements(); |
---|
632 | } |
---|
633 | |
---|
634 | |
---|
635 | /** |
---|
636 | * Returns the value of the named measure |
---|
637 | * @param additionalMeasureName the name of the measure to query for its value |
---|
638 | * @return the value of the named measure |
---|
639 | * @throws IllegalArgumentException if the named measure is not supported |
---|
640 | */ |
---|
641 | public double getMeasure(String additionalMeasureName) { |
---|
642 | if (additionalMeasureName.compareToIgnoreCase("measureTreeSize") == 0) { |
---|
643 | return measureTreeSize(); |
---|
644 | } else if (additionalMeasureName.compareToIgnoreCase("measureNumLeaves") == 0) { |
---|
645 | return measureNumLeaves(); |
---|
646 | } else { |
---|
647 | throw new IllegalArgumentException(additionalMeasureName |
---|
648 | + " not supported (LMT)"); |
---|
649 | } |
---|
650 | } |
---|
651 | |
---|
652 | /** |
---|
653 | * Returns a string describing classifier |
---|
654 | * @return a description suitable for |
---|
655 | * displaying in the explorer/experimenter gui |
---|
656 | */ |
---|
657 | public String globalInfo() { |
---|
658 | return "Classifier for building 'logistic model trees', which are classification trees with " |
---|
659 | +"logistic regression functions at the leaves. The algorithm can deal with binary and multi-class " |
---|
660 | +"target variables, numeric and nominal attributes and missing values.\n\n" |
---|
661 | +"For more information see: \n\n" |
---|
662 | + getTechnicalInformation().toString(); |
---|
663 | } |
---|
664 | |
---|
665 | /** |
---|
666 | * Returns an instance of a TechnicalInformation object, containing |
---|
667 | * detailed information about the technical background of this class, |
---|
668 | * e.g., paper reference or book this class is based on. |
---|
669 | * |
---|
670 | * @return the technical information about this class |
---|
671 | */ |
---|
672 | public TechnicalInformation getTechnicalInformation() { |
---|
673 | TechnicalInformation result; |
---|
674 | TechnicalInformation additional; |
---|
675 | |
---|
676 | result = new TechnicalInformation(Type.ARTICLE); |
---|
677 | result.setValue(Field.AUTHOR, "Niels Landwehr and Mark Hall and Eibe Frank"); |
---|
678 | result.setValue(Field.TITLE, "Logistic Model Trees"); |
---|
679 | result.setValue(Field.JOURNAL, "Machine Learning"); |
---|
680 | result.setValue(Field.YEAR, "2005"); |
---|
681 | result.setValue(Field.VOLUME, "95"); |
---|
682 | result.setValue(Field.PAGES, "161-205"); |
---|
683 | result.setValue(Field.NUMBER, "1-2"); |
---|
684 | |
---|
685 | additional = result.add(Type.INPROCEEDINGS); |
---|
686 | additional.setValue(Field.AUTHOR, "Marc Sumner and Eibe Frank and Mark Hall"); |
---|
687 | additional.setValue(Field.TITLE, "Speeding up Logistic Model Tree Induction"); |
---|
688 | additional.setValue(Field.BOOKTITLE, "9th European Conference on Principles and Practice of Knowledge Discovery in Databases"); |
---|
689 | additional.setValue(Field.YEAR, "2005"); |
---|
690 | additional.setValue(Field.PAGES, "675-683"); |
---|
691 | additional.setValue(Field.PUBLISHER, "Springer"); |
---|
692 | |
---|
693 | return result; |
---|
694 | } |
---|
695 | |
---|
696 | /** |
---|
697 | * Returns the tip text for this property |
---|
698 | * @return tip text for this property suitable for |
---|
699 | * displaying in the explorer/experimenter gui |
---|
700 | */ |
---|
701 | public String convertNominalTipText() { |
---|
702 | return "Convert all nominal attributes to binary ones before building the tree. " |
---|
703 | +"This means that all splits in the final tree will be binary."; |
---|
704 | } |
---|
705 | |
---|
706 | /** |
---|
707 | * Returns the tip text for this property |
---|
708 | * @return tip text for this property suitable for |
---|
709 | * displaying in the explorer/experimenter gui |
---|
710 | */ |
---|
711 | public String splitOnResidualsTipText() { |
---|
712 | return "Set splitting criterion based on the residuals of LogitBoost. " |
---|
713 | +"There are two possible splitting criteria for LMT: the default is to use the C4.5 " |
---|
714 | +"splitting criterion that uses information gain on the class variable. The other splitting " |
---|
715 | +"criterion tries to improve the purity in the residuals produces when fitting the logistic " |
---|
716 | +"regression functions. The choice of the splitting criterion does not usually affect classification " |
---|
717 | +"accuracy much, but can produce different trees."; |
---|
718 | } |
---|
719 | |
---|
720 | /** |
---|
721 | * Returns the tip text for this property |
---|
722 | * @return tip text for this property suitable for |
---|
723 | * displaying in the explorer/experimenter gui |
---|
724 | */ |
---|
725 | public String fastRegressionTipText() { |
---|
726 | return "Use heuristic that avoids cross-validating the number of Logit-Boost iterations at every node. " |
---|
727 | +"When fitting the logistic regression functions at a node, LMT has to determine the number of LogitBoost " |
---|
728 | +"iterations to run. Originally, this number was cross-validated at every node in the tree. " |
---|
729 | +"To save time, this heuristic cross-validates the number only once and then uses that number at every " |
---|
730 | +"node in the tree. Usually this does not decrease accuracy but improves runtime considerably."; |
---|
731 | } |
---|
732 | |
---|
733 | |
---|
734 | /** |
---|
735 | * Returns the tip text for this property |
---|
736 | * @return tip text for this property suitable for |
---|
737 | * displaying in the explorer/experimenter gui |
---|
738 | */ |
---|
739 | public String errorOnProbabilitiesTipText() { |
---|
740 | return "Minimize error on probabilities instead of misclassification error when cross-validating the number " |
---|
741 | +"of LogitBoost iterations. When set, the number of LogitBoost iterations is chosen that minimizes " |
---|
742 | +"the root mean squared error instead of the misclassification error."; |
---|
743 | } |
---|
744 | |
---|
745 | /** |
---|
746 | * Returns the tip text for this property |
---|
747 | * @return tip text for this property suitable for |
---|
748 | * displaying in the explorer/experimenter gui |
---|
749 | */ |
---|
750 | public String numBoostingIterationsTipText() { |
---|
751 | return "Set a fixed number of iterations for LogitBoost. If >= 0, this sets a fixed number of LogitBoost " |
---|
752 | +"iterations that is used everywhere in the tree. If < 0, the number is cross-validated."; |
---|
753 | } |
---|
754 | |
---|
755 | /** |
---|
756 | * Returns the tip text for this property |
---|
757 | * @return tip text for this property suitable for |
---|
758 | * displaying in the explorer/experimenter gui |
---|
759 | */ |
---|
760 | public String minNumInstancesTipText() { |
---|
761 | return "Set the minimum number of instances at which a node is considered for splitting. " |
---|
762 | +"The default value is 15."; |
---|
763 | } |
---|
764 | |
---|
765 | /** |
---|
766 | * Returns the tip text for this property |
---|
767 | * @return tip text for this property suitable for |
---|
768 | * displaying in the explorer/experimenter gui |
---|
769 | */ |
---|
770 | public String weightTrimBetaTipText() { |
---|
771 | return "Set the beta value used for weight trimming in LogitBoost. " |
---|
772 | +"Only instances carrying (1 - beta)% of the weight from previous iteration " |
---|
773 | +"are used in the next iteration. Set to 0 for no weight trimming. " |
---|
774 | +"The default value is 0."; |
---|
775 | } |
---|
776 | |
---|
777 | /** |
---|
778 | * Returns the tip text for this property |
---|
779 | * @return tip text for this property suitable for |
---|
780 | * displaying in the explorer/experimenter gui |
---|
781 | */ |
---|
782 | public String useAICTipText() { |
---|
783 | return "The AIC is used to determine when to stop LogitBoost iterations. " |
---|
784 | +"The default is not to use AIC."; |
---|
785 | } |
---|
786 | |
---|
787 | /** |
---|
788 | * Returns the revision string. |
---|
789 | * |
---|
790 | * @return the revision |
---|
791 | */ |
---|
792 | public String getRevision() { |
---|
793 | return RevisionUtils.extract("$Revision: 6088 $"); |
---|
794 | } |
---|
795 | |
---|
796 | /** |
---|
797 | * Main method for testing this class |
---|
798 | * |
---|
799 | * @param argv the commandline options |
---|
800 | */ |
---|
801 | public static void main (String [] argv) { |
---|
802 | runClassifier(new LMT(), argv); |
---|
803 | } |
---|
804 | } |
---|