source: src/main/java/weka/classifiers/trees/j48/InfoGainSplitCrit.java @ 14

Last change on this file since 14 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 3.3 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    InfoGainSplitCrit.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.classifiers.trees.j48;
24
25import weka.core.RevisionUtils;
26import weka.core.Utils;
27
28/**
29 * Class for computing the information gain for a given distribution.
30 *
31 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
32 * @version $Revision: 1.10 $
33 */
34public final class InfoGainSplitCrit
35  extends EntropyBasedSplitCrit{
36
37  /** for serialization */
38  private static final long serialVersionUID = 4892105020180728499L;
39
40  /**
41   * This method is a straightforward implementation of the information
42   * gain criterion for the given distribution.
43   */
44  public final double splitCritValue(Distribution bags) {
45
46    double numerator;
47       
48    numerator = oldEnt(bags)-newEnt(bags);
49
50    // Splits with no gain are useless.
51    if (Utils.eq(numerator,0))
52      return Double.MAX_VALUE;
53       
54    // We take the reciprocal value because we want to minimize the
55    // splitting criterion's value.
56    return bags.total()/numerator;
57  }
58
59  /**
60   * This method computes the information gain in the same way
61   * C4.5 does.
62   *
63   * @param bags the distribution
64   * @param totalNoInst weight of ALL instances (including the
65   * ones with missing values).
66   */
67  public final double splitCritValue(Distribution bags, double totalNoInst) {
68   
69    double numerator;
70    double noUnknown;
71    double unknownRate;
72    int i;
73   
74    noUnknown = totalNoInst-bags.total();
75    unknownRate = noUnknown/totalNoInst;
76    numerator = (oldEnt(bags)-newEnt(bags));
77    numerator = (1-unknownRate)*numerator;
78   
79    // Splits with no gain are useless.
80    if (Utils.eq(numerator,0))
81      return 0;
82   
83    return numerator/bags.total();
84  }
85
86  /**
87   * This method computes the information gain in the same way
88   * C4.5 does.
89   *
90   * @param bags the distribution
91   * @param totalNoInst weight of ALL instances
92   * @param oldEnt entropy with respect to "no-split"-model.
93   */
94  public final double splitCritValue(Distribution bags,double totalNoInst,
95                                     double oldEnt) {
96   
97    double numerator;
98    double noUnknown;
99    double unknownRate;
100    int i;
101   
102    noUnknown = totalNoInst-bags.total();
103    unknownRate = noUnknown/totalNoInst;
104    numerator = (oldEnt-newEnt(bags));
105    numerator = (1-unknownRate)*numerator;
106   
107    // Splits with no gain are useless.
108    if (Utils.eq(numerator,0))
109      return 0;
110   
111    return numerator/bags.total();
112  }
113 
114  /**
115   * Returns the revision string.
116   *
117   * @return            the revision
118   */
119  public String getRevision() {
120    return RevisionUtils.extract("$Revision: 1.10 $");
121  }
122}
Note: See TracBrowser for help on using the repository browser.