source: src/main/java/weka/classifiers/misc/monotone/CumulativeDiscreteDistribution.java @ 10

Last change on this file since 10 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 7.9 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    CumulativeDiscreteDistribution.java
19 *    Copyright (C) 2004 Stijn Lievens
20 *
21 */
22
23package weka.classifiers.misc.monotone;
24
25import weka.core.RevisionHandler;
26import weka.core.RevisionUtils;
27import weka.core.Utils;
28import weka.estimators.DiscreteEstimator;
29
30import java.io.Serializable;
31
32/**
33 * Represents a discrete cumulative probability distribution
34 * over a totally ordered discrete set.  The elements of this set
35 * are numbered consecutively starting from 0.
36 *<p>
37 * In this implementation object of type
38 * <code> CumulativeDiscreteDistribution </code> are immutable.
39 * </p>
40 * <p>
41 * This implementation is part of the master's thesis: "Studie
42 * en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd
43 * rangschikken", Stijn Lievens, Ghent University, 2004.
44 * </p>
45 *
46 * @author Stijn Lievens (stijn.lievens@ugent.be)
47 * @version $Revision: 5922 $
48 */
49public class CumulativeDiscreteDistribution
50  implements Serializable, RevisionHandler {
51
52  /** for serialization */
53  private static final long serialVersionUID = -2959806903004453176L;
54
55  /**
56   * small tolerance to account for rounding errors when working
57   * with doubles
58   */
59  private static final double TOLERANCE = Utils.SMALL;
60
61  /** The cumulative probabilities */
62  private double[] m_cdf;
63
64  /**
65   * Create a discrete cumulative probability distribution based on a
66   * <code> DiscreteEstimator. </code>
67   *
68   * @param e the <code> DiscreteEstimator </code> on which the
69   * cumulative probability distribution will be based
70   */
71  public CumulativeDiscreteDistribution(DiscreteEstimator e) {
72    m_cdf = new double[e.getNumSymbols()];
73
74    if (m_cdf.length != 0) {
75      m_cdf[0] = e.getProbability(0);
76    }
77    for (int i = 1; i < m_cdf.length; i++) {
78      m_cdf[i] = m_cdf[i - 1] + e.getProbability(i);
79    }
80  }
81
82  /**
83   * Create a <code> CumulativeDiscreteDistribution </code> based on a
84   * <code> DiscreteDistribution. </code>
85   *
86   * @param d the <code> DiscreteDistribution </code> on which the
87   * cumulative probability distribution will be based
88   */
89  public CumulativeDiscreteDistribution(DiscreteDistribution d) {
90    m_cdf = new double[d.getNumSymbols()];
91
92    if (m_cdf.length != 0) {
93      m_cdf[0] = d.getProbability(0);
94    }
95    for (int i = 1; i < m_cdf.length; i++) {
96      m_cdf[i] = m_cdf[i - 1] + d.getProbability(i);
97    }
98  }
99
100  /**
101   * Create a <code> CumulativeDiscreteDistribution </code> based on an
102   * array of doubles.  The array <code> cdf </code> is copied, so
103   * the caller can reuse the same array.
104   *
105   * @param cdf an array that represents a valid discrete cumulative
106   * probability distribution
107   * @throws IllegalArgumentException if the array doesn't
108   * represent a valid cumulative discrete distribution function
109   */
110  public CumulativeDiscreteDistribution(double[] cdf) throws IllegalArgumentException {
111    if (!validCumulativeDistribution(cdf)) {
112      throw new IllegalArgumentException
113      ("Not a cumulative probability distribution");
114    }
115    m_cdf = new double[cdf.length];
116    System.arraycopy(cdf, 0, m_cdf, 0, cdf.length);
117  }
118
119  /**
120   * Get the number of elements over which the cumulative
121   * probability distribution is defined.
122   *
123   * @return the number of elements over which the cumulative
124   * probability distribution is defined.
125   */
126  public int getNumSymbols() {
127    return  (m_cdf != null) ? m_cdf.length : 0; 
128  }
129
130  /**
131   * Get the probability of finding an element
132   * smaller or equal than <code> index. </code>
133   *
134   * @param index the required index
135   * @return the probability of finding an element &lt;= index
136   */
137  public double getCumulativeProbability(int index) {
138    return m_cdf[index];
139  }
140
141  /**
142   * Get an array representation of the cumulative probability
143   * distribution.
144   *
145   * @return an array of doubles representing the cumulative
146   * probability distribution
147   */
148  public double[] toArray() {
149    double cdf[] = new double[m_cdf.length];
150    System.arraycopy(m_cdf, 0, cdf, 0, cdf.length);
151    return cdf;
152  }
153
154  /**
155   * Returns if <code> this </code> is dominated by <code> cdf. </code>
156   * This means that we check if, for all indices <code> i </code>, it
157   * holds that <code> this.getProbability(i) &gt;= cdf.getProbability(i).
158   * </code>
159   *
160   * @param cdf the <code> CumulativeDiscreteDistribution </code>
161   * <code> this </code> is compared to
162   * @return <code> true </code> if <code> this </code> is dominated by
163   * <code> cdf </code>, <code> false </code> otherwise
164   * @throws IllegalArgumentException if the two distributions don't
165   * have the same length
166   */
167  public boolean stochasticDominatedBy(CumulativeDiscreteDistribution cdf) throws IllegalArgumentException {
168    if (getNumSymbols() != cdf.getNumSymbols()) {
169      throw new IllegalArgumentException
170      ("Cumulative distributions are not defined over"
171          + " the same number of symbols");
172    }
173    for (int i = 0; i < m_cdf.length; i++) {
174      if (m_cdf[i] < cdf.m_cdf[i]) {
175        return false;
176      }
177    }
178    return true;
179  }
180
181  /**
182   * Indicates if the object <code> o </code> equals <code> this. </code>
183   * Note: for practical reasons I was forced to use a small tolerance
184   * whilst comparing the distributions, meaning that the transitivity
185   * property of <code> equals </code> is not guaranteed.
186   *
187   * @param o the reference object with which to compare
188   * @return <code> true </code> if <code> o </code> equals <code>
189   * this, </code> <code> false </code> otherwise
190   */
191  public boolean equals(Object o) {
192    if (!(o instanceof CumulativeDiscreteDistribution)) {
193      return false;
194    }
195    CumulativeDiscreteDistribution cdf = 
196      (CumulativeDiscreteDistribution) o;
197    if (m_cdf.length != cdf.getNumSymbols()) {
198      return false;
199    }
200    for (int i = 0; i < m_cdf.length; i++) {
201      if (Math.abs(m_cdf[i] - cdf.m_cdf[i]) > TOLERANCE) {
202        return false;
203      }
204    }
205    return true;
206  }
207
208  /**
209   * Get a string representation of the cumulative probability
210   * distribution.
211   *
212   * @return a string representation of the distribution.
213   */
214  public String toString() {
215    // XXX MAYBE WE SHOULD USE STRINGBUFFER AND USE A FIXED
216    // NUMBER OF DECIMALS BEHIND THE COMMA
217    String s = "[" + getNumSymbols() + "]:";
218    for (int i = 0; i < getNumSymbols(); i++)
219      s += " " + getCumulativeProbability(i);
220    return s;
221  }
222
223  /**
224   * Checks if the given array represents a valid cumulative
225   * distribution.
226   *
227   * @param cdf an array holding the presumed cumulative distribution
228   * @return <code> true </code> if <code> cdf </code> represents
229   * a valid cumulative discrete distribution function, <code> false
230   * </code> otherwise
231   */
232  private static boolean validCumulativeDistribution(double[] cdf) {
233    if (cdf == null || cdf.length == 0 || 
234        Math.abs(cdf[cdf.length - 1] - 1.) > TOLERANCE || cdf[0] < 0) {
235      return false;
236    }
237    for (int i = 1; i < cdf.length; i++) {
238
239      // allow small tolerance for increasing check
240      if (cdf[i] < cdf[i - 1] - TOLERANCE) {   
241        return false;
242      }
243    }
244    return true;
245  }
246 
247  /**
248   * Returns the revision string.
249   *
250   * @return            the revision
251   */
252  public String getRevision() {
253    return RevisionUtils.extract("$Revision: 5922 $");
254  }
255}
Note: See TracBrowser for help on using the repository browser.