source: src/main/java/weka/classifiers/trees/j48/GainRatioSplitCrit.java @ 8

Last change on this file since 8 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 3.2 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    GainRatioSplitCrit.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.classifiers.trees.j48;
24
25import weka.core.RevisionUtils;
26import weka.core.Utils;
27
28/**
29 * Class for computing the gain ratio for a given distribution.
30 *
31 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
32 * @version $Revision: 1.8 $
33 */
34public final class GainRatioSplitCrit
35  extends EntropyBasedSplitCrit{
36
37  /** for serialization */
38  private static final long serialVersionUID = -433336694718670930L;
39
40  /**
41   * This method is a straightforward implementation of the gain
42   * ratio criterion for the given distribution.
43   */
44  public final double splitCritValue(Distribution bags) {
45
46    double numerator;
47    double denumerator;
48   
49    numerator = oldEnt(bags)-newEnt(bags);
50
51    // Splits with no gain are useless.
52    if (Utils.eq(numerator,0))
53      return Double.MAX_VALUE;
54    denumerator = splitEnt(bags);
55   
56    // Test if split is trivial.
57    if (Utils.eq(denumerator,0))
58      return Double.MAX_VALUE;
59   
60    //  We take the reciprocal value because we want to minimize the
61    // splitting criterion's value.
62    return denumerator/numerator;
63  }
64
65  /**
66   * This method computes the gain ratio in the same way C4.5 does.
67   *
68   * @param bags the distribution
69   * @param totalnoInst the weight of ALL instances
70   * @param numerator the info gain
71   */
72  public final double splitCritValue(Distribution bags, double totalnoInst,
73                                     double numerator){
74   
75    double denumerator;
76    double noUnknown;
77    double unknownRate;
78    int i;
79   
80    // Compute split info.
81    denumerator = splitEnt(bags,totalnoInst);
82       
83    // Test if split is trivial.
84    if (Utils.eq(denumerator,0))
85      return 0; 
86    denumerator = denumerator/totalnoInst;
87
88    return numerator/denumerator;
89  }
90 
91  /**
92   * Help method for computing the split entropy.
93   */
94  private final double splitEnt(Distribution bags,double totalnoInst){
95   
96    double returnValue = 0;
97    double noUnknown;
98    int i;
99   
100    noUnknown = totalnoInst-bags.total();
101    if (Utils.gr(bags.total(),0)){
102      for (i=0;i<bags.numBags();i++)
103        returnValue = returnValue-logFunc(bags.perBag(i));
104      returnValue = returnValue-logFunc(noUnknown);
105      returnValue = returnValue+logFunc(totalnoInst);
106    }
107    return returnValue;
108  }
109 
110  /**
111   * Returns the revision string.
112   *
113   * @return            the revision
114   */
115  public String getRevision() {
116    return RevisionUtils.extract("$Revision: 1.8 $");
117  }
118}
Note: See TracBrowser for help on using the repository browser.