| 1 | /* |
|---|
| 2 | * This program is free software; you can redistribute it and/or modify |
|---|
| 3 | * it under the terms of the GNU General Public License as published by |
|---|
| 4 | * the Free Software Foundation; either version 2 of the License, or |
|---|
| 5 | * (at your option) any later version. |
|---|
| 6 | * |
|---|
| 7 | * This program is distributed in the hope that it will be useful, |
|---|
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 10 | * GNU General Public License for more details. |
|---|
| 11 | * |
|---|
| 12 | * You should have received a copy of the GNU General Public License |
|---|
| 13 | * along with this program; if not, write to the Free Software |
|---|
| 14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|---|
| 15 | */ |
|---|
| 16 | |
|---|
| 17 | /* |
|---|
| 18 | * GainRatioSplitCrit.java |
|---|
| 19 | * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand |
|---|
| 20 | * |
|---|
| 21 | */ |
|---|
| 22 | |
|---|
| 23 | package weka.classifiers.trees.j48; |
|---|
| 24 | |
|---|
| 25 | import weka.core.RevisionUtils; |
|---|
| 26 | import weka.core.Utils; |
|---|
| 27 | |
|---|
| 28 | /** |
|---|
| 29 | * Class for computing the gain ratio for a given distribution. |
|---|
| 30 | * |
|---|
| 31 | * @author Eibe Frank (eibe@cs.waikato.ac.nz) |
|---|
| 32 | * @version $Revision: 1.8 $ |
|---|
| 33 | */ |
|---|
| 34 | public final class GainRatioSplitCrit |
|---|
| 35 | extends EntropyBasedSplitCrit{ |
|---|
| 36 | |
|---|
| 37 | /** for serialization */ |
|---|
| 38 | private static final long serialVersionUID = -433336694718670930L; |
|---|
| 39 | |
|---|
| 40 | /** |
|---|
| 41 | * This method is a straightforward implementation of the gain |
|---|
| 42 | * ratio criterion for the given distribution. |
|---|
| 43 | */ |
|---|
| 44 | public final double splitCritValue(Distribution bags) { |
|---|
| 45 | |
|---|
| 46 | double numerator; |
|---|
| 47 | double denumerator; |
|---|
| 48 | |
|---|
| 49 | numerator = oldEnt(bags)-newEnt(bags); |
|---|
| 50 | |
|---|
| 51 | // Splits with no gain are useless. |
|---|
| 52 | if (Utils.eq(numerator,0)) |
|---|
| 53 | return Double.MAX_VALUE; |
|---|
| 54 | denumerator = splitEnt(bags); |
|---|
| 55 | |
|---|
| 56 | // Test if split is trivial. |
|---|
| 57 | if (Utils.eq(denumerator,0)) |
|---|
| 58 | return Double.MAX_VALUE; |
|---|
| 59 | |
|---|
| 60 | // We take the reciprocal value because we want to minimize the |
|---|
| 61 | // splitting criterion's value. |
|---|
| 62 | return denumerator/numerator; |
|---|
| 63 | } |
|---|
| 64 | |
|---|
| 65 | /** |
|---|
| 66 | * This method computes the gain ratio in the same way C4.5 does. |
|---|
| 67 | * |
|---|
| 68 | * @param bags the distribution |
|---|
| 69 | * @param totalnoInst the weight of ALL instances |
|---|
| 70 | * @param numerator the info gain |
|---|
| 71 | */ |
|---|
| 72 | public final double splitCritValue(Distribution bags, double totalnoInst, |
|---|
| 73 | double numerator){ |
|---|
| 74 | |
|---|
| 75 | double denumerator; |
|---|
| 76 | double noUnknown; |
|---|
| 77 | double unknownRate; |
|---|
| 78 | int i; |
|---|
| 79 | |
|---|
| 80 | // Compute split info. |
|---|
| 81 | denumerator = splitEnt(bags,totalnoInst); |
|---|
| 82 | |
|---|
| 83 | // Test if split is trivial. |
|---|
| 84 | if (Utils.eq(denumerator,0)) |
|---|
| 85 | return 0; |
|---|
| 86 | denumerator = denumerator/totalnoInst; |
|---|
| 87 | |
|---|
| 88 | return numerator/denumerator; |
|---|
| 89 | } |
|---|
| 90 | |
|---|
| 91 | /** |
|---|
| 92 | * Help method for computing the split entropy. |
|---|
| 93 | */ |
|---|
| 94 | private final double splitEnt(Distribution bags,double totalnoInst){ |
|---|
| 95 | |
|---|
| 96 | double returnValue = 0; |
|---|
| 97 | double noUnknown; |
|---|
| 98 | int i; |
|---|
| 99 | |
|---|
| 100 | noUnknown = totalnoInst-bags.total(); |
|---|
| 101 | if (Utils.gr(bags.total(),0)){ |
|---|
| 102 | for (i=0;i<bags.numBags();i++) |
|---|
| 103 | returnValue = returnValue-logFunc(bags.perBag(i)); |
|---|
| 104 | returnValue = returnValue-logFunc(noUnknown); |
|---|
| 105 | returnValue = returnValue+logFunc(totalnoInst); |
|---|
| 106 | } |
|---|
| 107 | return returnValue; |
|---|
| 108 | } |
|---|
| 109 | |
|---|
| 110 | /** |
|---|
| 111 | * Returns the revision string. |
|---|
| 112 | * |
|---|
| 113 | * @return the revision |
|---|
| 114 | */ |
|---|
| 115 | public String getRevision() { |
|---|
| 116 | return RevisionUtils.extract("$Revision: 1.8 $"); |
|---|
| 117 | } |
|---|
| 118 | } |
|---|