source: src/main/java/weka/classifiers/misc/monotone/DistributionUtils.java @ 18

Last change on this file since 18 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 12.8 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    DistributionUtils.java
19 *    Copyright (C) 2004 Stijn Lievens
20 *
21 */
22
23package weka.classifiers.misc.monotone;
24
25import weka.core.RevisionHandler;
26import weka.core.RevisionUtils;
27import weka.estimators.DiscreteEstimator;
28
29import java.util.Arrays;
30
31/**
32 * Class with some simple methods acting on
33 * <code> CumulativeDiscreteDistribution. </code>
34 * All of the methods in this class are very easily implemented
35 * and the main use of this class is to gather all these methods
36 * in a single place.  It could be argued that some of the methods
37 * should be implemented in the class
38 * <code> CumulativeDiscreteDistribution </code> itself.
39 * <p>
40 * This implementation is part of the master's thesis: "Studie
41 * en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd
42 * rangschikken", Stijn Lievens, Ghent University, 2004.
43 * </p>
44 *
45 * @author Stijn Lievens (stijn.lievens@ugent.be)
46 * @version $Revision: 5922 $
47 */
48public class DistributionUtils
49  implements RevisionHandler {
50
51  /**
52   * Constant indicating the maximal number of classes
53   * for which there is a minimal and maximal distribution
54   * present in the pool.
55   * One of the purposes of this class is to serve as a factory
56   * for minimal and maximal cumulative probability distributions.
57   * Since instances of <code> CumulativeDiscreteDistribution </code>
58   * are immutable, we can create them beforehand and reuse them
59   * every time one is needed. 
60   */
61  private static final int MAX_CLASSES = 20;
62
63  /**
64   * Array filled with minimal cumulative discrete probability
65   * distributions.  This means that probability one is given to the
66   * first element.  This array serves as a pool for the method
67   * <code> getMinimalCumulativeDiscreteDistribution. </code>
68   */
69  private static final CumulativeDiscreteDistribution[] m_minimalDistributions;
70
71  /**
72   * Array filled with maximal cumulative discrete probability
73   * distributions. This means that probability one is given to the
74   * largest element.  This array serves as a pool for the method
75   * <code> getMaximalCumulativeDiscreteDistribution. </code>
76   */
77  private static final CumulativeDiscreteDistribution[] m_maximalDistributions;
78
79  // fill both static arrays with the correct distributions
80  static {
81    m_minimalDistributions = new CumulativeDiscreteDistribution[MAX_CLASSES + 1];
82    m_maximalDistributions = new CumulativeDiscreteDistribution[MAX_CLASSES + 1];
83
84    for (int i = 1; i <= MAX_CLASSES; i++) {
85      double[] dd = new double[i];
86      dd[dd.length - 1] = 1;
87      m_maximalDistributions[i] = new CumulativeDiscreteDistribution(dd);
88      Arrays.fill(dd,1);
89      m_minimalDistributions[i] = new CumulativeDiscreteDistribution(dd);
90    }
91  }
92
93  /**
94   *  Compute a linear interpolation between the two given
95   *  <code> CumulativeDiscreteDistribution. </code>
96   * 
97   *  @param cdf1 the first <code> CumulativeDiscreteDistribution </code>
98   *  @param cdf2 the second <code> CumulativeDiscreteDistribution </code>
99   *  @param s the interpolation parameter
100   *  @return (1 - s) &times; cdf1 + s &times; cdf2
101   *  @throws IllegalArgumentException if the two distributions
102   *  don't have the same size or if the parameter <code> s </code>
103   *  is not in the range [0,1]
104   */
105  public static CumulativeDiscreteDistribution interpolate(
106      CumulativeDiscreteDistribution cdf1,
107      CumulativeDiscreteDistribution cdf2, double s) throws IllegalArgumentException {
108
109    if (cdf1.getNumSymbols() != cdf2.getNumSymbols()) { 
110      throw new IllegalArgumentException
111      ("CumulativeDiscreteDistributions don't have " 
112          + "the same size");
113    }
114
115    if (s < 0 || s > 1) {
116      throw new IllegalArgumentException
117      ("Parameter s exceeds bounds");
118    }
119
120    double[] res = new double[cdf1.getNumSymbols()];
121    for (int i = 0, n = cdf1.getNumSymbols(); i < n; i++) {
122      res[i] = (1 - s) * cdf1.getCumulativeProbability(i) +
123      s * cdf2.getCumulativeProbability(i);
124    }
125    return new CumulativeDiscreteDistribution(res);
126  }
127
128  /**
129   *  Compute a linear interpolation between the two given
130   *  <code> CumulativeDiscreteDistribution. </code>
131   * 
132   *  @param cdf1 the first <code> CumulativeDiscreteDistribution </code>
133   *  @param cdf2 the second <code> CumulativeDiscreteDistribution </code>
134   *  @param s the interpolation parameters, only the relevant number
135   *  of entries is used, so the array may be longer than the common
136   *  length of <code> cdf1 </code> and <code> cdf2 </code>
137   *  @return (1 - s) &times; cdf1 + s &times; cdf2, or more specifically
138   *  a distribution cd such that <code>
139   *  cd.getCumulativeProbability(i) =
140   *  (1-s[i]) &times; cdf1.getCumulativeProbability(i) +
141   *  s[i] &times; cdf2.getCumulativeProbability(i) </code>
142   *  @throws IllegalArgumentException if the two distributions
143   *  don't have the same size or if the array <code> s </code>
144   *  contains parameters not in the range <code> [0,1] </code>
145   */
146  public static CumulativeDiscreteDistribution interpolate(
147      CumulativeDiscreteDistribution cdf1,
148      CumulativeDiscreteDistribution cdf2, double[] s) throws IllegalArgumentException {
149
150    if (cdf1.getNumSymbols() != cdf2.getNumSymbols()) { 
151      throw new IllegalArgumentException
152      ("CumulativeDiscreteDistributions don't have " 
153          + "the same size");
154    }
155
156    if (cdf1.getNumSymbols() > s.length) {
157      throw new IllegalArgumentException
158      ("Array with interpolation parameters is not "
159          + " long enough");
160    }
161
162    double[] res = new double[cdf1.getNumSymbols()];
163    for (int i = 0, n = cdf1.getNumSymbols(); i < n; i++) {
164      if (s[i] < 0 || s[i] > 1) {
165        throw new IllegalArgumentException
166        ("Interpolation parameter exceeds bounds");
167      }
168      res[i] = (1 - s[i]) * cdf1.getCumulativeProbability(i) +
169      s[i] * cdf2.getCumulativeProbability(i);
170    }
171    return new CumulativeDiscreteDistribution(res);
172  }
173
174  /**
175   *  Compute a linear interpolation between the two given
176   *  <code> DiscreteDistribution. </code>
177   * 
178   *  @param ddf1 the first <code> DiscreteDistribution </code>
179   *  @param ddf2 the second <code> DiscreteDistribution </code>
180   *  @param s the interpolation parameter
181   *  @return <code> (1 - s) &times; ddf1 + s &times; ddf2 </code>
182   *  @throws IllegalArgumentException if the two distributions
183   *  don't have the same size or if the parameter <code> s </code>
184   *  is not in the range [0,1]
185   */
186  public static DiscreteDistribution interpolate(
187      DiscreteDistribution ddf1,
188      DiscreteDistribution ddf2, double s) throws IllegalArgumentException {
189
190    if (ddf1.getNumSymbols() != ddf2.getNumSymbols()) { 
191      throw new IllegalArgumentException
192      ("DiscreteDistributions don't have " 
193          + "the same size");
194    }
195
196    if (s < 0 || s > 1) {
197      throw new IllegalArgumentException
198      ("Parameter s exceeds bounds");
199    }
200
201    double[] res = new double[ddf1.getNumSymbols()];
202    for (int i = 0, n = ddf1.getNumSymbols(); i < n; i++) {
203      res[i] = (1 - s) * ddf1.getProbability(i) +
204      s * ddf2.getProbability(i);
205    }
206    return new DiscreteDistribution(res);
207  }
208
209  /**
210   * Create a new <code> CumulativeDiscreteDistribution </code>
211   * that is the minimum of the two given <code>
212   * CumulativeDiscreteDistribution. </code>
213   * Each component of the resulting probability distribution
214   * is the minimum of the two corresponding components. <br/>
215   * Note: despite of its name, the returned cumulative probability
216   * distribution dominates both the arguments of this method.
217   *
218   * @param cdf1 first <code> CumulativeDiscreteDistribution </code>
219   * @param cdf2 second <code> CumulativeDiscreteDistribution </code>
220   * @return the minimum of the two distributions
221   * @throws IllegalArgumentException if the two distributions
222   * dont't have the same length
223   */
224  public static CumulativeDiscreteDistribution takeMin(
225      CumulativeDiscreteDistribution cdf1,
226      CumulativeDiscreteDistribution cdf2) throws IllegalArgumentException {
227   
228    if (cdf1.getNumSymbols() != cdf2.getNumSymbols() )
229      throw new IllegalArgumentException
230      ("Cumulative distributions don't have the same length");
231
232    double[] cdf = new double[cdf1.getNumSymbols()];
233    int n = cdf.length;
234    for (int i = 0; i < n; i++) { 
235      cdf[i] = Math.min(cdf1.getCumulativeProbability(i),
236          cdf2.getCumulativeProbability(i));
237    }
238    return new CumulativeDiscreteDistribution(cdf);
239  }
240
241  /**
242   * Create a new <code> CumulativeDiscreteDistribution </code>
243   * that is the maximum of the two given <code>
244   * CumulativeDiscreteDistribution. </code>
245   * Each component of the resulting probability distribution
246   * is the maximum of the two corresponding components.
247   * Note: despite of its name, the returned cumulative probability
248   * distribution is dominated by both the arguments of this method.
249   *
250   * @param cdf1 first <code> CumulativeDiscreteDistribution </code>
251   * @param cdf2 second <code> CumulativeDiscreteDistribution </code>
252   * @return the maximum of the two distributions
253   * @throws IllegalArgumentException if the two distributions
254   * dont't have the same length
255   */
256  public static CumulativeDiscreteDistribution takeMax(
257      CumulativeDiscreteDistribution cdf1,
258      CumulativeDiscreteDistribution cdf2) throws IllegalArgumentException {
259   
260    if (cdf1.getNumSymbols() != cdf2.getNumSymbols() )
261      throw new IllegalArgumentException
262      ("Cumulative distributions don't have the same length");
263
264    double[] cdf = new double[cdf1.getNumSymbols()];
265    int n = cdf.length;
266    for (int i = 0; i < n; i++) { 
267      cdf[i] = Math.max(cdf1.getCumulativeProbability(i),
268          cdf2.getCumulativeProbability(i));
269    }
270    return new CumulativeDiscreteDistribution(cdf);
271  }
272
273  /**
274   * Converts a <code> DiscreteEstimator </code> to an array of
275   * doubles.
276   *
277   * @param df the <code> DiscreteEstimator </code> to be converted
278   * @return an array of doubles representing the
279   * <code> DiscreteEstimator </code>
280   */
281  public static double[] getDistributionArray(DiscreteEstimator df) {
282    double[] dfa = new double[df.getNumSymbols()];
283    for (int i = 0; i < dfa.length; i++) {
284      dfa[i] = df.getProbability(i);
285    }
286    return dfa;
287  }
288
289  /**
290   * Get the minimal <code> CumulativeDiscreteDistribution </code>
291   * over <code> numClasses </code> elements.  This means that
292   * a probability of one is assigned to the first element.
293   *
294   * @param numClasses the number of elements
295   * @return the minimal <code> CumulativeDiscreteDistribution </code>
296   * over the requested number of elements
297   * @throws IllegalArgumentException if <code> numClasses </code>
298   * is smaller or equal than 0
299   */
300  public static CumulativeDiscreteDistribution getMinimalCumulativeDiscreteDistribution(
301      int numClasses) throws IllegalArgumentException {
302   
303    if (numClasses <= 0) {
304      throw new IllegalArgumentException
305      ("Number of elements must be positive");
306    }
307    if (numClasses <= MAX_CLASSES) {
308      return m_minimalDistributions[numClasses];
309    }
310
311    double[] dd = new double[numClasses];
312    Arrays.fill(dd,1);
313    return new CumulativeDiscreteDistribution(dd);
314  }
315
316  /**
317   * Get the maximal <code> CumulativeDiscreteDistribution </code>
318   * over <code> numClasses </code> elements.  This means that
319   * a probability of one is assigned to the last class.
320   *
321   * @param numClasses the number of elements
322   * @return the maximal <code> CumulativeDiscreteDistribution </code>
323   * over the requested number of elements
324   * @throws IllegalArgumentException if <code> numClasses </code>
325   * is smaller or equal than 0
326   */
327  public static CumulativeDiscreteDistribution getMaximalCumulativeDiscreteDistribution(
328      int numClasses) throws IllegalArgumentException {
329   
330    if (numClasses <= 0) {
331      throw new IllegalArgumentException
332      ("Number of elements must be positive");
333    }
334    if (numClasses <= MAX_CLASSES) {
335      return m_maximalDistributions[numClasses];
336    }
337
338    double[] dd = new double[numClasses];
339    dd[dd.length - 1] = 1;
340    return new CumulativeDiscreteDistribution(dd);
341  }
342 
343  /**
344   * Returns the revision string.
345   *
346   * @return            the revision
347   */
348  public String getRevision() {
349    return RevisionUtils.extract("$Revision: 5922 $");
350  }
351}
Note: See TracBrowser for help on using the repository browser.