source: src/main/java/weka/classifiers/misc/monotone/DiscreteDistribution.java @ 17

Last change on this file since 17 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 8.1 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    DiscreteDistribution.java
19 *    Copyright (C) 2004 Stijn Lievens
20 *
21 */
22
23package weka.classifiers.misc.monotone;
24
25import weka.core.RevisionHandler;
26import weka.core.RevisionUtils;
27import weka.core.Utils;
28import weka.estimators.DiscreteEstimator;
29
30import java.io.Serializable;
31
32/**
33 * This class represents a discrete probability distribution
34 * over a finite number of values.
35 * <p>
36 * In the present implementation, objects of type
37 * <code> DiscreteDistribution </code> are in fact immutable,
38 * so all one can do is create objects and retrieve information,
39 * such as median and mean, from them.
40 * </p>
41 * <p>
42 * This implementation is part of the master's thesis: "Studie
43 * en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd
44 * rangschikken", Stijn Lievens, Ghent University, 2004.
45 * </p>
46 *
47 * @author Stijn Lievens (stijn.lievens@ugent.be)
48 * @version $Revision: 5922 $
49 */
50public class DiscreteDistribution
51  implements Serializable, RevisionHandler {
52
53  /** for serialization. */
54  private static final long serialVersionUID = 1954630934425689828L;
55
56  /**
57   * small tolerance to account for rounding errors when working
58   * with doubles
59   */
60  private static final double TOLERANCE=Utils.SMALL;
61
62  /** the array of probabilities */
63  private double[] m_dd;
64
65  /**
66   * Create a <code> DiscreteDistribution </code> based on a
67   * <code> DiscreteEstimator. </code>
68   *
69   * @param e the <code> DiscreteEstimator </code>
70   */
71  public DiscreteDistribution(DiscreteEstimator e) {
72    m_dd = new double[e.getNumSymbols()];
73
74    for (int i = 0; i < m_dd.length; i++) {
75      m_dd[i] = e.getProbability(i);
76    }
77  }
78
79  /**
80   * Create a <code> DiscreteDistribution </code> based on a
81   * <code> CumulativeDiscreteDistribution. </code>
82   *
83   * @param cdf the <code> CumulativeDiscreteDistribution </code>
84   */
85  public DiscreteDistribution(CumulativeDiscreteDistribution cdf) {
86    m_dd = new double[cdf.getNumSymbols()];
87
88    if (m_dd.length != 0) {
89      m_dd[0] = cdf.getCumulativeProbability(0);
90    }
91    for (int i = 1; i < m_dd.length; i++) {
92      m_dd[i] = cdf.getCumulativeProbability(i) 
93      - cdf.getCumulativeProbability(i - 1);
94    }
95  }
96
97  /**
98   * Create a <code> DiscreteDistribution </code> based on an
99   * array of doubles.
100   *
101   * @param dd the array of doubles representing a valid
102   * discrete distribution
103   * @throws IllegalArgumentException if <code> dd </code>
104   * does not represent a valid discrete distribution
105   */
106  public DiscreteDistribution(double[] dd) throws IllegalArgumentException {
107    if (!validDiscreteDistribution(dd)) {
108      throw new IllegalArgumentException
109      ("Not a valid discrete distribution");
110    }
111
112    m_dd = new double[dd.length];
113    System.arraycopy(dd,0,m_dd,0,dd.length);
114  }
115
116  /**
117   * Get the number of elements over which the <code>
118   * DiscreteDistribution </code> is defined.
119   *
120   * @return the number of elements over which the <code>
121   * DiscreteDistribution </code> is defined
122   */
123  public int getNumSymbols() {
124    return  (m_dd != null) ? m_dd.length : 0; 
125  }
126
127  /**
128   * Get the probability of finding the element at
129   * a specified index.
130   *
131   * @param index the index of the required element
132   * @return the probability of finding the specified element
133   */
134  public double getProbability(int index) {
135    return m_dd[index];
136  }
137
138  /**
139   * Calculate the mean of the distribution.  The scores for
140   * calculating the mean start from 0 and have step size one,
141   * i.e. if there are n elements then the scores are 0,1,...,n-1.
142   *
143   * @return the mean of the distribution
144   */
145  public double mean() {
146    double mean = 0;
147    for (int i = 1; i < m_dd.length; i++) {
148      mean += i * m_dd[i];
149    }
150    return mean;
151  }
152
153  /**
154   * Calculate the median of the distribution.  This means
155   * the following: if there is a label m such that
156   * P(x &lt;= m) &gt;= &frac12; and
157   * P(x &gt;= m) &gt;= &frac12;  then this label is returned.
158   * If there is no such label, an interpolation between the
159   * smallest label satisfying the first condition and the
160   * largest label satisfying the second condition is performed.
161   * The returned value is thus either an element label, or
162   * exactly between two element labels.
163   *
164   * @return the median of the distribution.
165   **/
166  public double median() {
167
168    /* cumulative probabilities starting from the left and
169     * right respectively
170     */
171    double cl=m_dd[0];
172    double cr=m_dd[m_dd.length - 1]; // cumulative left and right
173
174    int i = 0;
175    while(cl < 0.5) {
176      cl += m_dd[++i]; // pre-increment
177    }
178
179    int j = m_dd.length - 1;
180    while(cr < 0.5) {
181      cr += m_dd[--j]; // pre-increment
182    }
183
184    return i == j ? i : ( (double) (i + j) ) / 2;
185  }
186
187  /**
188   * Get a sorted array containing the indices of the elements with
189   * maximal probability.
190   *
191   * @return an array of class indices with maximal probability.
192   */
193  public int[] modes() {
194    int[] mm = new int[m_dd.length];
195    double max = m_dd[0];
196    int nr = 1; // number of relevant elements in mm
197    for (int i = 1; i < m_dd.length; i++) {
198      if (m_dd[i] > max + TOLERANCE) { 
199
200        // new maximum
201        max = m_dd[i];
202        mm[0] = i;
203        nr = 1;
204      }
205      else if (Math.abs(m_dd[i] - max) < TOLERANCE) {
206        mm[nr++] = i;
207      }
208    }
209
210    // trim to correct size
211    int[] modes = new int[nr];
212    System.arraycopy(mm, 0, modes, 0, nr);
213    return modes;
214  }
215
216  /**
217   * Convert the <code> DiscreteDistribution </code> to an
218   * array of doubles.
219   *
220   * @return an array of doubles representing the
221   * <code> DiscreteDistribution </code>
222   */
223  public double[] toArray() {
224    double[] dd = new double[m_dd.length];
225    System.arraycopy(m_dd, 0, dd, 0, dd.length);
226    return dd;
227  }
228
229  /**
230   * Get a string representation of the given <code>
231   * DiscreteDistribution. </code>
232   *
233   * @return a string representation of this object
234   */
235  public String toString() {
236
237    // XXX MAYBE WE SHOULD USE STRINGBUFFER AND FIXED WIDTH ...
238    String s = "[" + getNumSymbols() + "]:";
239    for (int i = 0; i < getNumSymbols(); i++) {
240      s += " " + getProbability(i);
241    }
242    return s;
243  }
244
245  /**
246   * Checks if <code> this </code> is dominated by <code> dd. </code>
247
248   * @param dd the DiscreteDistribution to compare to
249   * @return <code> true </code> if <code> this </code> is dominated by
250   * <code> dd </code>, <code> false </code> otherwise
251   * @throws IllegalArgumentException if the two distributions don't
252   * have the same length
253   */
254  public boolean stochasticDominatedBy(DiscreteDistribution dd) throws IllegalArgumentException {
255    return (new CumulativeDiscreteDistribution(this)).
256    stochasticDominatedBy
257    (new CumulativeDiscreteDistribution(dd));
258  }
259
260  /**
261   * Checks if the given array of doubles represents a valid discrete
262   * distribution.
263   *
264   * @param dd an array holding the doubles
265   * @return <code> true </code> if <code> dd </code> is a valid discrete distribution,
266   * <code> false </code> otherwise
267   */
268  private static boolean validDiscreteDistribution(double[] dd) {
269    if (dd == null || dd.length == 0) {
270      return false;
271    }
272
273    double sum = 0;
274    for (int i = 0; i < dd.length; i++) {
275      if (dd[i] < 0) {
276        return false;
277      }
278      sum += dd[i];
279    }
280    return !(Math.abs(sum - 1) > TOLERANCE); 
281  }
282 
283  /**
284   * Returns the revision string.
285   *
286   * @return            the revision
287   */
288  public String getRevision() {
289    return RevisionUtils.extract("$Revision: 5922 $");
290  }
291}
Note: See TracBrowser for help on using the repository browser.