1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * DistributionUtils.java |
---|
19 | * Copyright (C) 2004 Stijn Lievens |
---|
20 | * |
---|
21 | */ |
---|
22 | |
---|
23 | package weka.classifiers.misc.monotone; |
---|
24 | |
---|
25 | import weka.core.RevisionHandler; |
---|
26 | import weka.core.RevisionUtils; |
---|
27 | import weka.estimators.DiscreteEstimator; |
---|
28 | |
---|
29 | import java.util.Arrays; |
---|
30 | |
---|
31 | /** |
---|
32 | * Class with some simple methods acting on |
---|
33 | * <code> CumulativeDiscreteDistribution. </code> |
---|
34 | * All of the methods in this class are very easily implemented |
---|
35 | * and the main use of this class is to gather all these methods |
---|
36 | * in a single place. It could be argued that some of the methods |
---|
37 | * should be implemented in the class |
---|
38 | * <code> CumulativeDiscreteDistribution </code> itself. |
---|
39 | * <p> |
---|
40 | * This implementation is part of the master's thesis: "Studie |
---|
41 | * en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd |
---|
42 | * rangschikken", Stijn Lievens, Ghent University, 2004. |
---|
43 | * </p> |
---|
44 | * |
---|
45 | * @author Stijn Lievens (stijn.lievens@ugent.be) |
---|
46 | * @version $Revision: 5922 $ |
---|
47 | */ |
---|
48 | public class DistributionUtils |
---|
49 | implements RevisionHandler { |
---|
50 | |
---|
51 | /** |
---|
52 | * Constant indicating the maximal number of classes |
---|
53 | * for which there is a minimal and maximal distribution |
---|
54 | * present in the pool. |
---|
55 | * One of the purposes of this class is to serve as a factory |
---|
56 | * for minimal and maximal cumulative probability distributions. |
---|
57 | * Since instances of <code> CumulativeDiscreteDistribution </code> |
---|
58 | * are immutable, we can create them beforehand and reuse them |
---|
59 | * every time one is needed. |
---|
60 | */ |
---|
61 | private static final int MAX_CLASSES = 20; |
---|
62 | |
---|
63 | /** |
---|
64 | * Array filled with minimal cumulative discrete probability |
---|
65 | * distributions. This means that probability one is given to the |
---|
66 | * first element. This array serves as a pool for the method |
---|
67 | * <code> getMinimalCumulativeDiscreteDistribution. </code> |
---|
68 | */ |
---|
69 | private static final CumulativeDiscreteDistribution[] m_minimalDistributions; |
---|
70 | |
---|
71 | /** |
---|
72 | * Array filled with maximal cumulative discrete probability |
---|
73 | * distributions. This means that probability one is given to the |
---|
74 | * largest element. This array serves as a pool for the method |
---|
75 | * <code> getMaximalCumulativeDiscreteDistribution. </code> |
---|
76 | */ |
---|
77 | private static final CumulativeDiscreteDistribution[] m_maximalDistributions; |
---|
78 | |
---|
79 | // fill both static arrays with the correct distributions |
---|
80 | static { |
---|
81 | m_minimalDistributions = new CumulativeDiscreteDistribution[MAX_CLASSES + 1]; |
---|
82 | m_maximalDistributions = new CumulativeDiscreteDistribution[MAX_CLASSES + 1]; |
---|
83 | |
---|
84 | for (int i = 1; i <= MAX_CLASSES; i++) { |
---|
85 | double[] dd = new double[i]; |
---|
86 | dd[dd.length - 1] = 1; |
---|
87 | m_maximalDistributions[i] = new CumulativeDiscreteDistribution(dd); |
---|
88 | Arrays.fill(dd,1); |
---|
89 | m_minimalDistributions[i] = new CumulativeDiscreteDistribution(dd); |
---|
90 | } |
---|
91 | } |
---|
92 | |
---|
93 | /** |
---|
94 | * Compute a linear interpolation between the two given |
---|
95 | * <code> CumulativeDiscreteDistribution. </code> |
---|
96 | * |
---|
97 | * @param cdf1 the first <code> CumulativeDiscreteDistribution </code> |
---|
98 | * @param cdf2 the second <code> CumulativeDiscreteDistribution </code> |
---|
99 | * @param s the interpolation parameter |
---|
100 | * @return (1 - s) × cdf1 + s × cdf2 |
---|
101 | * @throws IllegalArgumentException if the two distributions |
---|
102 | * don't have the same size or if the parameter <code> s </code> |
---|
103 | * is not in the range [0,1] |
---|
104 | */ |
---|
105 | public static CumulativeDiscreteDistribution interpolate( |
---|
106 | CumulativeDiscreteDistribution cdf1, |
---|
107 | CumulativeDiscreteDistribution cdf2, double s) throws IllegalArgumentException { |
---|
108 | |
---|
109 | if (cdf1.getNumSymbols() != cdf2.getNumSymbols()) { |
---|
110 | throw new IllegalArgumentException |
---|
111 | ("CumulativeDiscreteDistributions don't have " |
---|
112 | + "the same size"); |
---|
113 | } |
---|
114 | |
---|
115 | if (s < 0 || s > 1) { |
---|
116 | throw new IllegalArgumentException |
---|
117 | ("Parameter s exceeds bounds"); |
---|
118 | } |
---|
119 | |
---|
120 | double[] res = new double[cdf1.getNumSymbols()]; |
---|
121 | for (int i = 0, n = cdf1.getNumSymbols(); i < n; i++) { |
---|
122 | res[i] = (1 - s) * cdf1.getCumulativeProbability(i) + |
---|
123 | s * cdf2.getCumulativeProbability(i); |
---|
124 | } |
---|
125 | return new CumulativeDiscreteDistribution(res); |
---|
126 | } |
---|
127 | |
---|
128 | /** |
---|
129 | * Compute a linear interpolation between the two given |
---|
130 | * <code> CumulativeDiscreteDistribution. </code> |
---|
131 | * |
---|
132 | * @param cdf1 the first <code> CumulativeDiscreteDistribution </code> |
---|
133 | * @param cdf2 the second <code> CumulativeDiscreteDistribution </code> |
---|
134 | * @param s the interpolation parameters, only the relevant number |
---|
135 | * of entries is used, so the array may be longer than the common |
---|
136 | * length of <code> cdf1 </code> and <code> cdf2 </code> |
---|
137 | * @return (1 - s) × cdf1 + s × cdf2, or more specifically |
---|
138 | * a distribution cd such that <code> |
---|
139 | * cd.getCumulativeProbability(i) = |
---|
140 | * (1-s[i]) × cdf1.getCumulativeProbability(i) + |
---|
141 | * s[i] × cdf2.getCumulativeProbability(i) </code> |
---|
142 | * @throws IllegalArgumentException if the two distributions |
---|
143 | * don't have the same size or if the array <code> s </code> |
---|
144 | * contains parameters not in the range <code> [0,1] </code> |
---|
145 | */ |
---|
146 | public static CumulativeDiscreteDistribution interpolate( |
---|
147 | CumulativeDiscreteDistribution cdf1, |
---|
148 | CumulativeDiscreteDistribution cdf2, double[] s) throws IllegalArgumentException { |
---|
149 | |
---|
150 | if (cdf1.getNumSymbols() != cdf2.getNumSymbols()) { |
---|
151 | throw new IllegalArgumentException |
---|
152 | ("CumulativeDiscreteDistributions don't have " |
---|
153 | + "the same size"); |
---|
154 | } |
---|
155 | |
---|
156 | if (cdf1.getNumSymbols() > s.length) { |
---|
157 | throw new IllegalArgumentException |
---|
158 | ("Array with interpolation parameters is not " |
---|
159 | + " long enough"); |
---|
160 | } |
---|
161 | |
---|
162 | double[] res = new double[cdf1.getNumSymbols()]; |
---|
163 | for (int i = 0, n = cdf1.getNumSymbols(); i < n; i++) { |
---|
164 | if (s[i] < 0 || s[i] > 1) { |
---|
165 | throw new IllegalArgumentException |
---|
166 | ("Interpolation parameter exceeds bounds"); |
---|
167 | } |
---|
168 | res[i] = (1 - s[i]) * cdf1.getCumulativeProbability(i) + |
---|
169 | s[i] * cdf2.getCumulativeProbability(i); |
---|
170 | } |
---|
171 | return new CumulativeDiscreteDistribution(res); |
---|
172 | } |
---|
173 | |
---|
174 | /** |
---|
175 | * Compute a linear interpolation between the two given |
---|
176 | * <code> DiscreteDistribution. </code> |
---|
177 | * |
---|
178 | * @param ddf1 the first <code> DiscreteDistribution </code> |
---|
179 | * @param ddf2 the second <code> DiscreteDistribution </code> |
---|
180 | * @param s the interpolation parameter |
---|
181 | * @return <code> (1 - s) × ddf1 + s × ddf2 </code> |
---|
182 | * @throws IllegalArgumentException if the two distributions |
---|
183 | * don't have the same size or if the parameter <code> s </code> |
---|
184 | * is not in the range [0,1] |
---|
185 | */ |
---|
186 | public static DiscreteDistribution interpolate( |
---|
187 | DiscreteDistribution ddf1, |
---|
188 | DiscreteDistribution ddf2, double s) throws IllegalArgumentException { |
---|
189 | |
---|
190 | if (ddf1.getNumSymbols() != ddf2.getNumSymbols()) { |
---|
191 | throw new IllegalArgumentException |
---|
192 | ("DiscreteDistributions don't have " |
---|
193 | + "the same size"); |
---|
194 | } |
---|
195 | |
---|
196 | if (s < 0 || s > 1) { |
---|
197 | throw new IllegalArgumentException |
---|
198 | ("Parameter s exceeds bounds"); |
---|
199 | } |
---|
200 | |
---|
201 | double[] res = new double[ddf1.getNumSymbols()]; |
---|
202 | for (int i = 0, n = ddf1.getNumSymbols(); i < n; i++) { |
---|
203 | res[i] = (1 - s) * ddf1.getProbability(i) + |
---|
204 | s * ddf2.getProbability(i); |
---|
205 | } |
---|
206 | return new DiscreteDistribution(res); |
---|
207 | } |
---|
208 | |
---|
209 | /** |
---|
210 | * Create a new <code> CumulativeDiscreteDistribution </code> |
---|
211 | * that is the minimum of the two given <code> |
---|
212 | * CumulativeDiscreteDistribution. </code> |
---|
213 | * Each component of the resulting probability distribution |
---|
214 | * is the minimum of the two corresponding components. <br/> |
---|
215 | * Note: despite of its name, the returned cumulative probability |
---|
216 | * distribution dominates both the arguments of this method. |
---|
217 | * |
---|
218 | * @param cdf1 first <code> CumulativeDiscreteDistribution </code> |
---|
219 | * @param cdf2 second <code> CumulativeDiscreteDistribution </code> |
---|
220 | * @return the minimum of the two distributions |
---|
221 | * @throws IllegalArgumentException if the two distributions |
---|
222 | * dont't have the same length |
---|
223 | */ |
---|
224 | public static CumulativeDiscreteDistribution takeMin( |
---|
225 | CumulativeDiscreteDistribution cdf1, |
---|
226 | CumulativeDiscreteDistribution cdf2) throws IllegalArgumentException { |
---|
227 | |
---|
228 | if (cdf1.getNumSymbols() != cdf2.getNumSymbols() ) |
---|
229 | throw new IllegalArgumentException |
---|
230 | ("Cumulative distributions don't have the same length"); |
---|
231 | |
---|
232 | double[] cdf = new double[cdf1.getNumSymbols()]; |
---|
233 | int n = cdf.length; |
---|
234 | for (int i = 0; i < n; i++) { |
---|
235 | cdf[i] = Math.min(cdf1.getCumulativeProbability(i), |
---|
236 | cdf2.getCumulativeProbability(i)); |
---|
237 | } |
---|
238 | return new CumulativeDiscreteDistribution(cdf); |
---|
239 | } |
---|
240 | |
---|
241 | /** |
---|
242 | * Create a new <code> CumulativeDiscreteDistribution </code> |
---|
243 | * that is the maximum of the two given <code> |
---|
244 | * CumulativeDiscreteDistribution. </code> |
---|
245 | * Each component of the resulting probability distribution |
---|
246 | * is the maximum of the two corresponding components. |
---|
247 | * Note: despite of its name, the returned cumulative probability |
---|
248 | * distribution is dominated by both the arguments of this method. |
---|
249 | * |
---|
250 | * @param cdf1 first <code> CumulativeDiscreteDistribution </code> |
---|
251 | * @param cdf2 second <code> CumulativeDiscreteDistribution </code> |
---|
252 | * @return the maximum of the two distributions |
---|
253 | * @throws IllegalArgumentException if the two distributions |
---|
254 | * dont't have the same length |
---|
255 | */ |
---|
256 | public static CumulativeDiscreteDistribution takeMax( |
---|
257 | CumulativeDiscreteDistribution cdf1, |
---|
258 | CumulativeDiscreteDistribution cdf2) throws IllegalArgumentException { |
---|
259 | |
---|
260 | if (cdf1.getNumSymbols() != cdf2.getNumSymbols() ) |
---|
261 | throw new IllegalArgumentException |
---|
262 | ("Cumulative distributions don't have the same length"); |
---|
263 | |
---|
264 | double[] cdf = new double[cdf1.getNumSymbols()]; |
---|
265 | int n = cdf.length; |
---|
266 | for (int i = 0; i < n; i++) { |
---|
267 | cdf[i] = Math.max(cdf1.getCumulativeProbability(i), |
---|
268 | cdf2.getCumulativeProbability(i)); |
---|
269 | } |
---|
270 | return new CumulativeDiscreteDistribution(cdf); |
---|
271 | } |
---|
272 | |
---|
273 | /** |
---|
274 | * Converts a <code> DiscreteEstimator </code> to an array of |
---|
275 | * doubles. |
---|
276 | * |
---|
277 | * @param df the <code> DiscreteEstimator </code> to be converted |
---|
278 | * @return an array of doubles representing the |
---|
279 | * <code> DiscreteEstimator </code> |
---|
280 | */ |
---|
281 | public static double[] getDistributionArray(DiscreteEstimator df) { |
---|
282 | double[] dfa = new double[df.getNumSymbols()]; |
---|
283 | for (int i = 0; i < dfa.length; i++) { |
---|
284 | dfa[i] = df.getProbability(i); |
---|
285 | } |
---|
286 | return dfa; |
---|
287 | } |
---|
288 | |
---|
289 | /** |
---|
290 | * Get the minimal <code> CumulativeDiscreteDistribution </code> |
---|
291 | * over <code> numClasses </code> elements. This means that |
---|
292 | * a probability of one is assigned to the first element. |
---|
293 | * |
---|
294 | * @param numClasses the number of elements |
---|
295 | * @return the minimal <code> CumulativeDiscreteDistribution </code> |
---|
296 | * over the requested number of elements |
---|
297 | * @throws IllegalArgumentException if <code> numClasses </code> |
---|
298 | * is smaller or equal than 0 |
---|
299 | */ |
---|
300 | public static CumulativeDiscreteDistribution getMinimalCumulativeDiscreteDistribution( |
---|
301 | int numClasses) throws IllegalArgumentException { |
---|
302 | |
---|
303 | if (numClasses <= 0) { |
---|
304 | throw new IllegalArgumentException |
---|
305 | ("Number of elements must be positive"); |
---|
306 | } |
---|
307 | if (numClasses <= MAX_CLASSES) { |
---|
308 | return m_minimalDistributions[numClasses]; |
---|
309 | } |
---|
310 | |
---|
311 | double[] dd = new double[numClasses]; |
---|
312 | Arrays.fill(dd,1); |
---|
313 | return new CumulativeDiscreteDistribution(dd); |
---|
314 | } |
---|
315 | |
---|
316 | /** |
---|
317 | * Get the maximal <code> CumulativeDiscreteDistribution </code> |
---|
318 | * over <code> numClasses </code> elements. This means that |
---|
319 | * a probability of one is assigned to the last class. |
---|
320 | * |
---|
321 | * @param numClasses the number of elements |
---|
322 | * @return the maximal <code> CumulativeDiscreteDistribution </code> |
---|
323 | * over the requested number of elements |
---|
324 | * @throws IllegalArgumentException if <code> numClasses </code> |
---|
325 | * is smaller or equal than 0 |
---|
326 | */ |
---|
327 | public static CumulativeDiscreteDistribution getMaximalCumulativeDiscreteDistribution( |
---|
328 | int numClasses) throws IllegalArgumentException { |
---|
329 | |
---|
330 | if (numClasses <= 0) { |
---|
331 | throw new IllegalArgumentException |
---|
332 | ("Number of elements must be positive"); |
---|
333 | } |
---|
334 | if (numClasses <= MAX_CLASSES) { |
---|
335 | return m_maximalDistributions[numClasses]; |
---|
336 | } |
---|
337 | |
---|
338 | double[] dd = new double[numClasses]; |
---|
339 | dd[dd.length - 1] = 1; |
---|
340 | return new CumulativeDiscreteDistribution(dd); |
---|
341 | } |
---|
342 | |
---|
343 | /** |
---|
344 | * Returns the revision string. |
---|
345 | * |
---|
346 | * @return the revision |
---|
347 | */ |
---|
348 | public String getRevision() { |
---|
349 | return RevisionUtils.extract("$Revision: 5922 $"); |
---|
350 | } |
---|
351 | } |
---|