1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * ContingencyTables.java |
---|
19 | * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand |
---|
20 | * |
---|
21 | */ |
---|
22 | |
---|
23 | package weka.core; |
---|
24 | |
---|
25 | /** |
---|
26 | * Class implementing some statistical routines for contingency tables. |
---|
27 | * |
---|
28 | * @author Eibe Frank (eibe@cs.waikato.ac.nz) |
---|
29 | * @version $Revision: 5953 $ |
---|
30 | */ |
---|
31 | public class ContingencyTables |
---|
32 | implements RevisionHandler { |
---|
33 | |
---|
34 | /** The natural logarithm of 2 */ |
---|
35 | private static double log2 = Math.log(2); |
---|
36 | |
---|
37 | /** |
---|
38 | * Returns chi-squared probability for a given matrix. |
---|
39 | * |
---|
40 | * @param matrix the contigency table |
---|
41 | * @param yates is Yates' correction to be used? |
---|
42 | * @return the chi-squared probability |
---|
43 | */ |
---|
44 | |
---|
45 | public static double chiSquared(double [][] matrix, boolean yates) { |
---|
46 | |
---|
47 | int df = (matrix.length - 1) * (matrix[0].length - 1); |
---|
48 | |
---|
49 | return Statistics.chiSquaredProbability(chiVal(matrix, yates), df); |
---|
50 | } |
---|
51 | |
---|
52 | /** |
---|
53 | * Computes chi-squared statistic for a contingency table. |
---|
54 | * |
---|
55 | * @param matrix the contigency table |
---|
56 | * @param useYates is Yates' correction to be used? |
---|
57 | * @return the value of the chi-squared statistic |
---|
58 | */ |
---|
59 | public static double chiVal(double [][] matrix, boolean useYates) { |
---|
60 | |
---|
61 | int df, nrows, ncols, row, col; |
---|
62 | double[] rtotal, ctotal; |
---|
63 | double expect = 0, chival = 0, n = 0; |
---|
64 | boolean yates = true; |
---|
65 | |
---|
66 | nrows = matrix.length; |
---|
67 | ncols = matrix[0].length; |
---|
68 | rtotal = new double [nrows]; |
---|
69 | ctotal = new double [ncols]; |
---|
70 | for (row = 0; row < nrows; row++) { |
---|
71 | for (col = 0; col < ncols; col++) { |
---|
72 | rtotal[row] += matrix[row][col]; |
---|
73 | ctotal[col] += matrix[row][col]; |
---|
74 | n += matrix[row][col]; |
---|
75 | } |
---|
76 | } |
---|
77 | df = (nrows - 1)*(ncols - 1); |
---|
78 | if ((df > 1) || (!useYates)) { |
---|
79 | yates = false; |
---|
80 | } else if (df <= 0) { |
---|
81 | return 0; |
---|
82 | } |
---|
83 | chival = 0.0; |
---|
84 | for (row = 0; row < nrows; row++) { |
---|
85 | if (Utils.gr(rtotal[row], 0)) { |
---|
86 | for (col = 0; col < ncols; col++) { |
---|
87 | if (Utils.gr(ctotal[col], 0)) { |
---|
88 | expect = (ctotal[col] * rtotal[row]) / n; |
---|
89 | chival += chiCell (matrix[row][col], expect, yates); |
---|
90 | } |
---|
91 | } |
---|
92 | } |
---|
93 | } |
---|
94 | return chival; |
---|
95 | } |
---|
96 | |
---|
97 | /** |
---|
98 | * Tests if Cochran's criterion is fullfilled for the given |
---|
99 | * contingency table. Rows and columns with all zeros are not considered |
---|
100 | * relevant. |
---|
101 | * |
---|
102 | * @param matrix the contigency table to be tested |
---|
103 | * @return true if contingency table is ok, false if not |
---|
104 | */ |
---|
105 | public static boolean cochransCriterion(double[][] matrix) { |
---|
106 | |
---|
107 | double[] rtotal, ctotal; |
---|
108 | double n = 0, expect, smallfreq = 5; |
---|
109 | int smallcount = 0, nonZeroRows = 0, nonZeroColumns = 0, nrows, ncols, |
---|
110 | row, col; |
---|
111 | |
---|
112 | nrows = matrix.length; |
---|
113 | ncols = matrix[0].length; |
---|
114 | |
---|
115 | rtotal = new double [nrows]; |
---|
116 | ctotal = new double [ncols]; |
---|
117 | for (row = 0; row < nrows; row++) { |
---|
118 | for (col = 0; col < ncols; col++) { |
---|
119 | rtotal[row] += matrix[row][col]; |
---|
120 | ctotal[col] += matrix[row][col]; |
---|
121 | n += matrix[row][col]; |
---|
122 | } |
---|
123 | } |
---|
124 | for (row = 0; row < nrows; row++) { |
---|
125 | if (Utils.gr(rtotal[row], 0)) { |
---|
126 | nonZeroRows++; |
---|
127 | } |
---|
128 | } |
---|
129 | for (col = 0; col < ncols; col++) { |
---|
130 | if (Utils.gr(ctotal[col], 0)) { |
---|
131 | nonZeroColumns++; |
---|
132 | } |
---|
133 | } |
---|
134 | for (row = 0; row < nrows; row++) { |
---|
135 | if (Utils.gr(rtotal[row], 0)) { |
---|
136 | for (col = 0; col < ncols; col++) { |
---|
137 | if (Utils.gr(ctotal[col], 0)) { |
---|
138 | expect = (ctotal[col] * rtotal[row]) / n; |
---|
139 | if (Utils.sm(expect, smallfreq)) { |
---|
140 | if (Utils.sm(expect, 1)) { |
---|
141 | return false; |
---|
142 | } else { |
---|
143 | smallcount++; |
---|
144 | if (smallcount > (nonZeroRows * nonZeroColumns) / smallfreq) { |
---|
145 | return false; |
---|
146 | } |
---|
147 | } |
---|
148 | } |
---|
149 | } |
---|
150 | } |
---|
151 | } |
---|
152 | } |
---|
153 | return true; |
---|
154 | } |
---|
155 | |
---|
156 | /** |
---|
157 | * Computes Cramer's V for a contingency table. |
---|
158 | * |
---|
159 | * @param matrix the contingency table |
---|
160 | * @return Cramer's V |
---|
161 | */ |
---|
162 | public static double CramersV(double [][] matrix) { |
---|
163 | |
---|
164 | int row, col, nrows,ncols, min; |
---|
165 | double n = 0; |
---|
166 | |
---|
167 | nrows = matrix.length; |
---|
168 | ncols = matrix[0].length; |
---|
169 | for (row = 0; row < nrows; row++) { |
---|
170 | for (col = 0; col < ncols; col++) { |
---|
171 | n += matrix[row][col]; |
---|
172 | } |
---|
173 | } |
---|
174 | min = nrows < ncols ? nrows-1 : ncols-1; |
---|
175 | if ((min == 0) || Utils.eq(n, 0)) |
---|
176 | return 0; |
---|
177 | return Math.sqrt(chiVal(matrix, false) / (n * (double)min)); |
---|
178 | } |
---|
179 | |
---|
180 | /** |
---|
181 | * Computes the entropy of the given array. |
---|
182 | * |
---|
183 | * @param array the array |
---|
184 | * @return the entropy |
---|
185 | */ |
---|
186 | public static double entropy(double[] array) { |
---|
187 | |
---|
188 | double returnValue = 0, sum = 0; |
---|
189 | |
---|
190 | for (int i = 0; i < array.length; i++) { |
---|
191 | returnValue -= lnFunc(array[i]); |
---|
192 | sum += array[i]; |
---|
193 | } |
---|
194 | if (Utils.eq(sum, 0)) { |
---|
195 | return 0; |
---|
196 | } else { |
---|
197 | return (returnValue + lnFunc(sum)) / (sum * log2); |
---|
198 | } |
---|
199 | } |
---|
200 | |
---|
201 | /** |
---|
202 | * Computes conditional entropy of the rows given |
---|
203 | * the columns. |
---|
204 | * |
---|
205 | * @param matrix the contingency table |
---|
206 | * @return the conditional entropy of the rows given the columns |
---|
207 | */ |
---|
208 | public static double entropyConditionedOnColumns(double[][] matrix) { |
---|
209 | |
---|
210 | double returnValue = 0, sumForColumn, total = 0; |
---|
211 | |
---|
212 | for (int j = 0; j < matrix[0].length; j++) { |
---|
213 | sumForColumn = 0; |
---|
214 | for (int i = 0; i < matrix.length; i++) { |
---|
215 | returnValue = returnValue + lnFunc(matrix[i][j]); |
---|
216 | sumForColumn += matrix[i][j]; |
---|
217 | } |
---|
218 | returnValue = returnValue - lnFunc(sumForColumn); |
---|
219 | total += sumForColumn; |
---|
220 | } |
---|
221 | if (Utils.eq(total, 0)) { |
---|
222 | return 0; |
---|
223 | } |
---|
224 | return -returnValue / (total * log2); |
---|
225 | } |
---|
226 | |
---|
227 | /** |
---|
228 | * Computes conditional entropy of the columns given |
---|
229 | * the rows. |
---|
230 | * |
---|
231 | * @param matrix the contingency table |
---|
232 | * @return the conditional entropy of the columns given the rows |
---|
233 | */ |
---|
234 | public static double entropyConditionedOnRows(double[][] matrix) { |
---|
235 | |
---|
236 | double returnValue = 0, sumForRow, total = 0; |
---|
237 | |
---|
238 | for (int i = 0; i < matrix.length; i++) { |
---|
239 | sumForRow = 0; |
---|
240 | for (int j = 0; j < matrix[0].length; j++) { |
---|
241 | returnValue = returnValue + lnFunc(matrix[i][j]); |
---|
242 | sumForRow += matrix[i][j]; |
---|
243 | } |
---|
244 | returnValue = returnValue - lnFunc(sumForRow); |
---|
245 | total += sumForRow; |
---|
246 | } |
---|
247 | if (Utils.eq(total, 0)) { |
---|
248 | return 0; |
---|
249 | } |
---|
250 | return -returnValue / (total * log2); |
---|
251 | } |
---|
252 | |
---|
253 | /** |
---|
254 | * Computes conditional entropy of the columns given the rows |
---|
255 | * of the test matrix with respect to the train matrix. Uses a |
---|
256 | * Laplace prior. Does NOT normalize the entropy. |
---|
257 | * |
---|
258 | * @param train the train matrix |
---|
259 | * @param test the test matrix |
---|
260 | * @param numClasses the number of symbols for Laplace |
---|
261 | * @return the entropy |
---|
262 | */ |
---|
263 | public static double entropyConditionedOnRows(double[][] train, |
---|
264 | double[][] test, |
---|
265 | double numClasses) { |
---|
266 | |
---|
267 | double returnValue = 0, trainSumForRow, testSumForRow, testSum = 0; |
---|
268 | |
---|
269 | for (int i = 0; i < test.length; i++) { |
---|
270 | trainSumForRow = 0; |
---|
271 | testSumForRow = 0; |
---|
272 | for (int j = 0; j < test[0].length; j++) { |
---|
273 | returnValue -= test[i][j] * Math.log(train[i][j] + 1); |
---|
274 | trainSumForRow += train[i][j]; |
---|
275 | testSumForRow += test[i][j]; |
---|
276 | } |
---|
277 | testSum = testSumForRow; |
---|
278 | returnValue += testSumForRow * Math.log(trainSumForRow + |
---|
279 | numClasses); |
---|
280 | } |
---|
281 | return returnValue / (testSum * log2); |
---|
282 | } |
---|
283 | |
---|
284 | /** |
---|
285 | * Computes the rows' entropy for the given contingency table. |
---|
286 | * |
---|
287 | * @param matrix the contingency table |
---|
288 | * @return the rows' entropy |
---|
289 | */ |
---|
290 | public static double entropyOverRows(double[][] matrix) { |
---|
291 | |
---|
292 | double returnValue = 0, sumForRow, total = 0; |
---|
293 | |
---|
294 | for (int i = 0; i < matrix.length; i++) { |
---|
295 | sumForRow = 0; |
---|
296 | for (int j = 0; j < matrix[0].length; j++) { |
---|
297 | sumForRow += matrix[i][j]; |
---|
298 | } |
---|
299 | returnValue = returnValue - lnFunc(sumForRow); |
---|
300 | total += sumForRow; |
---|
301 | } |
---|
302 | if (Utils.eq(total, 0)) { |
---|
303 | return 0; |
---|
304 | } |
---|
305 | return (returnValue + lnFunc(total)) / (total * log2); |
---|
306 | } |
---|
307 | |
---|
308 | /** |
---|
309 | * Computes the columns' entropy for the given contingency table. |
---|
310 | * |
---|
311 | * @param matrix the contingency table |
---|
312 | * @return the columns' entropy |
---|
313 | */ |
---|
314 | public static double entropyOverColumns(double[][] matrix){ |
---|
315 | |
---|
316 | double returnValue = 0, sumForColumn, total = 0; |
---|
317 | |
---|
318 | for (int j = 0; j < matrix[0].length; j++){ |
---|
319 | sumForColumn = 0; |
---|
320 | for (int i = 0; i < matrix.length; i++) { |
---|
321 | sumForColumn += matrix[i][j]; |
---|
322 | } |
---|
323 | returnValue = returnValue - lnFunc(sumForColumn); |
---|
324 | total += sumForColumn; |
---|
325 | } |
---|
326 | if (Utils.eq(total, 0)) { |
---|
327 | return 0; |
---|
328 | } |
---|
329 | return (returnValue + lnFunc(total)) / (total * log2); |
---|
330 | } |
---|
331 | |
---|
332 | /** |
---|
333 | * Computes gain ratio for contingency table (split on rows). |
---|
334 | * Returns Double.MAX_VALUE if the split entropy is 0. |
---|
335 | * |
---|
336 | * @param matrix the contingency table |
---|
337 | * @return the gain ratio |
---|
338 | */ |
---|
339 | public static double gainRatio(double[][] matrix){ |
---|
340 | |
---|
341 | double preSplit = 0, postSplit = 0, splitEnt = 0, |
---|
342 | sumForRow, sumForColumn, total = 0, infoGain; |
---|
343 | |
---|
344 | // Compute entropy before split |
---|
345 | for (int i = 0; i < matrix[0].length; i++) { |
---|
346 | sumForColumn = 0; |
---|
347 | for (int j = 0; j < matrix.length; j++) |
---|
348 | sumForColumn += matrix[j][i]; |
---|
349 | preSplit += lnFunc(sumForColumn); |
---|
350 | total += sumForColumn; |
---|
351 | } |
---|
352 | preSplit -= lnFunc(total); |
---|
353 | |
---|
354 | // Compute entropy after split and split entropy |
---|
355 | for (int i = 0; i < matrix.length; i++) { |
---|
356 | sumForRow = 0; |
---|
357 | for (int j = 0; j < matrix[0].length; j++) { |
---|
358 | postSplit += lnFunc(matrix[i][j]); |
---|
359 | sumForRow += matrix[i][j]; |
---|
360 | } |
---|
361 | splitEnt += lnFunc(sumForRow); |
---|
362 | } |
---|
363 | postSplit -= splitEnt; |
---|
364 | splitEnt -= lnFunc(total); |
---|
365 | |
---|
366 | infoGain = preSplit - postSplit; |
---|
367 | if (Utils.eq(splitEnt, 0)) |
---|
368 | return 0; |
---|
369 | return infoGain / splitEnt; |
---|
370 | } |
---|
371 | |
---|
372 | /** |
---|
373 | * Returns negative base 2 logarithm of multiple hypergeometric |
---|
374 | * probability for a contingency table. |
---|
375 | * |
---|
376 | * @param matrix the contingency table |
---|
377 | * @return the log of the hypergeometric probability of the contingency table |
---|
378 | */ |
---|
379 | public static double log2MultipleHypergeometric(double[][] matrix) { |
---|
380 | |
---|
381 | double sum = 0, sumForRow, sumForColumn, total = 0; |
---|
382 | |
---|
383 | for (int i = 0; i < matrix.length; i++) { |
---|
384 | sumForRow = 0; |
---|
385 | for (int j = 0; j < matrix[i].length; j++) { |
---|
386 | sumForRow += matrix[i][j]; |
---|
387 | } |
---|
388 | sum += SpecialFunctions.lnFactorial(sumForRow); |
---|
389 | total += sumForRow; |
---|
390 | } |
---|
391 | for (int j = 0; j < matrix[0].length; j++) { |
---|
392 | sumForColumn = 0; |
---|
393 | for (int i = 0; i < matrix.length; i++) { |
---|
394 | sumForColumn += matrix [i][j]; |
---|
395 | } |
---|
396 | sum += SpecialFunctions.lnFactorial(sumForColumn); |
---|
397 | } |
---|
398 | for (int i = 0; i < matrix.length; i++) { |
---|
399 | for (int j = 0; j < matrix[i].length; j++) { |
---|
400 | sum -= SpecialFunctions.lnFactorial(matrix[i][j]); |
---|
401 | } |
---|
402 | } |
---|
403 | sum -= SpecialFunctions.lnFactorial(total); |
---|
404 | return -sum / log2; |
---|
405 | } |
---|
406 | |
---|
407 | /** |
---|
408 | * Reduces a matrix by deleting all zero rows and columns. |
---|
409 | * |
---|
410 | * @param matrix the matrix to be reduced |
---|
411 | * @return the matrix with all zero rows and columns deleted |
---|
412 | */ |
---|
413 | public static double[][] reduceMatrix(double[][] matrix) { |
---|
414 | |
---|
415 | int row, col, currCol, currRow, nrows, ncols, |
---|
416 | nonZeroRows = 0, nonZeroColumns = 0; |
---|
417 | double[] rtotal, ctotal; |
---|
418 | double[][] newMatrix; |
---|
419 | |
---|
420 | nrows = matrix.length; |
---|
421 | ncols = matrix[0].length; |
---|
422 | rtotal = new double [nrows]; |
---|
423 | ctotal = new double [ncols]; |
---|
424 | for (row = 0; row < nrows; row++) { |
---|
425 | for (col = 0; col < ncols; col++) { |
---|
426 | rtotal[row] += matrix[row][col]; |
---|
427 | ctotal[col] += matrix[row][col]; |
---|
428 | } |
---|
429 | } |
---|
430 | for (row = 0; row < nrows; row++) { |
---|
431 | if (Utils.gr(rtotal[row],0)) { |
---|
432 | nonZeroRows++; |
---|
433 | } |
---|
434 | } |
---|
435 | for (col = 0; col < ncols; col++) { |
---|
436 | if (Utils.gr(ctotal[col],0)) { |
---|
437 | nonZeroColumns++; |
---|
438 | } |
---|
439 | } |
---|
440 | newMatrix = new double[nonZeroRows][nonZeroColumns]; |
---|
441 | currRow = 0; |
---|
442 | for (row = 0; row < nrows; row++) { |
---|
443 | if (Utils.gr(rtotal[row],0)) { |
---|
444 | currCol = 0; |
---|
445 | for (col = 0; col < ncols; col++) { |
---|
446 | if (Utils.gr(ctotal[col],0)) { |
---|
447 | newMatrix[currRow][currCol] = matrix[row][col]; |
---|
448 | currCol++; |
---|
449 | } |
---|
450 | } |
---|
451 | currRow++; |
---|
452 | } |
---|
453 | } |
---|
454 | return newMatrix; |
---|
455 | } |
---|
456 | |
---|
457 | /** |
---|
458 | * Calculates the symmetrical uncertainty for base 2. |
---|
459 | * |
---|
460 | * @param matrix the contingency table |
---|
461 | * @return the calculated symmetrical uncertainty |
---|
462 | * |
---|
463 | */ |
---|
464 | public static double symmetricalUncertainty(double matrix[][]) { |
---|
465 | |
---|
466 | double sumForColumn, sumForRow, total = 0, columnEntropy = 0, |
---|
467 | rowEntropy = 0, entropyConditionedOnRows = 0, infoGain = 0; |
---|
468 | |
---|
469 | // Compute entropy for columns |
---|
470 | for (int i = 0; i < matrix[0].length; i++) { |
---|
471 | sumForColumn = 0; |
---|
472 | for (int j = 0; j < matrix.length; j++) { |
---|
473 | sumForColumn += matrix[j][i]; |
---|
474 | } |
---|
475 | columnEntropy += lnFunc(sumForColumn); |
---|
476 | total += sumForColumn; |
---|
477 | } |
---|
478 | columnEntropy -= lnFunc(total); |
---|
479 | |
---|
480 | // Compute entropy for rows and conditional entropy |
---|
481 | for (int i = 0; i < matrix.length; i++) { |
---|
482 | sumForRow = 0; |
---|
483 | for (int j = 0; j < matrix[0].length; j++) { |
---|
484 | sumForRow += matrix[i][j]; |
---|
485 | entropyConditionedOnRows += lnFunc(matrix[i][j]); |
---|
486 | } |
---|
487 | rowEntropy += lnFunc(sumForRow); |
---|
488 | } |
---|
489 | entropyConditionedOnRows -= rowEntropy; |
---|
490 | rowEntropy -= lnFunc(total); |
---|
491 | infoGain = columnEntropy - entropyConditionedOnRows; |
---|
492 | if (Utils.eq(columnEntropy, 0) || Utils.eq(rowEntropy, 0)) |
---|
493 | return 0; |
---|
494 | return 2.0 * (infoGain / (columnEntropy + rowEntropy)); |
---|
495 | } |
---|
496 | |
---|
497 | /** |
---|
498 | * Computes Goodman and Kruskal's tau-value for a contingency table. |
---|
499 | * |
---|
500 | * @param matrix the contingency table |
---|
501 | * @return Goodman and Kruskal's tau-value |
---|
502 | */ |
---|
503 | public static double tauVal(double[][] matrix) { |
---|
504 | |
---|
505 | int nrows, ncols, row, col; |
---|
506 | double [] ctotal; |
---|
507 | double maxcol = 0, max, maxtotal = 0, n = 0; |
---|
508 | |
---|
509 | nrows = matrix.length; |
---|
510 | ncols = matrix[0].length; |
---|
511 | ctotal = new double [ncols]; |
---|
512 | for (row = 0; row < nrows; row++) { |
---|
513 | max = 0; |
---|
514 | for (col = 0; col < ncols; col++) { |
---|
515 | if (Utils.gr(matrix[row][col], max)) |
---|
516 | max = matrix[row][col]; |
---|
517 | ctotal[col] += matrix[row][col]; |
---|
518 | n += matrix[row][col]; |
---|
519 | } |
---|
520 | maxtotal += max; |
---|
521 | } |
---|
522 | if (Utils.eq(n, 0)) { |
---|
523 | return 0; |
---|
524 | } |
---|
525 | maxcol = ctotal[Utils.maxIndex(ctotal)]; |
---|
526 | return (maxtotal - maxcol)/(n - maxcol); |
---|
527 | } |
---|
528 | |
---|
529 | /** |
---|
530 | * Help method for computing entropy. |
---|
531 | */ |
---|
532 | private static double lnFunc(double num){ |
---|
533 | |
---|
534 | // Constant hard coded for efficiency reasons |
---|
535 | if (num < 1e-6) { |
---|
536 | return 0; |
---|
537 | } else { |
---|
538 | return num * Math.log(num); |
---|
539 | } |
---|
540 | } |
---|
541 | |
---|
542 | /** |
---|
543 | * Computes chi-value for one cell in a contingency table. |
---|
544 | * |
---|
545 | * @param freq the observed frequency in the cell |
---|
546 | * @param expected the expected frequency in the cell |
---|
547 | * @return the chi-value for that cell; 0 if the expected value is |
---|
548 | * too close to zero |
---|
549 | */ |
---|
550 | private static double chiCell(double freq, double expected, |
---|
551 | boolean yates){ |
---|
552 | |
---|
553 | // Cell in empty row and column? |
---|
554 | if (Utils.smOrEq(expected, 0)) { |
---|
555 | return 0; |
---|
556 | } |
---|
557 | |
---|
558 | // Compute difference between observed and expected value |
---|
559 | double diff = Math.abs(freq - expected); |
---|
560 | if (yates) { |
---|
561 | |
---|
562 | // Apply Yates' correction if wanted |
---|
563 | diff -= 0.5; |
---|
564 | |
---|
565 | // The difference should never be negative |
---|
566 | if (diff < 0) { |
---|
567 | diff = 0; |
---|
568 | } |
---|
569 | } |
---|
570 | |
---|
571 | // Return chi-value for the cell |
---|
572 | return (diff * diff / expected); |
---|
573 | } |
---|
574 | |
---|
575 | /** |
---|
576 | * Returns the revision string. |
---|
577 | * |
---|
578 | * @return the revision |
---|
579 | */ |
---|
580 | public String getRevision() { |
---|
581 | return RevisionUtils.extract("$Revision: 5953 $"); |
---|
582 | } |
---|
583 | |
---|
584 | /** |
---|
585 | * Main method for testing this class. |
---|
586 | */ |
---|
587 | public static void main(String[] ops) { |
---|
588 | |
---|
589 | double[] firstRow = {10, 5, 20}; |
---|
590 | double[] secondRow = {2, 10, 6}; |
---|
591 | double[] thirdRow = {5, 10, 10}; |
---|
592 | double[][] matrix = new double[3][0]; |
---|
593 | |
---|
594 | matrix[0] = firstRow; matrix[1] = secondRow; matrix[2] = thirdRow; |
---|
595 | for (int i = 0; i < matrix.length; i++) { |
---|
596 | for (int j = 0; j < matrix[i].length; j++) { |
---|
597 | System.out.print(matrix[i][j] + " "); |
---|
598 | } |
---|
599 | System.out.println(); |
---|
600 | } |
---|
601 | System.out.println("Chi-squared probability: " + |
---|
602 | ContingencyTables.chiSquared(matrix, false)); |
---|
603 | System.out.println("Chi-squared value: " + |
---|
604 | ContingencyTables.chiVal(matrix, false)); |
---|
605 | System.out.println("Cochran's criterion fullfilled: " + |
---|
606 | ContingencyTables.cochransCriterion(matrix)); |
---|
607 | System.out.println("Cramer's V: " + |
---|
608 | ContingencyTables.CramersV(matrix)); |
---|
609 | System.out.println("Entropy of first row: " + |
---|
610 | ContingencyTables.entropy(firstRow)); |
---|
611 | System.out.println("Entropy conditioned on columns: " + |
---|
612 | ContingencyTables.entropyConditionedOnColumns(matrix)); |
---|
613 | System.out.println("Entropy conditioned on rows: " + |
---|
614 | ContingencyTables.entropyConditionedOnRows(matrix)); |
---|
615 | System.out.println("Entropy conditioned on rows (with Laplace): " + |
---|
616 | ContingencyTables.entropyConditionedOnRows(matrix, matrix, 3)); |
---|
617 | System.out.println("Entropy of rows: " + |
---|
618 | ContingencyTables.entropyOverRows(matrix)); |
---|
619 | System.out.println("Entropy of columns: " + |
---|
620 | ContingencyTables.entropyOverColumns(matrix)); |
---|
621 | System.out.println("Gain ratio: " + |
---|
622 | ContingencyTables.gainRatio(matrix)); |
---|
623 | System.out.println("Negative log2 of multiple hypergeometric probability: " + |
---|
624 | ContingencyTables.log2MultipleHypergeometric(matrix)); |
---|
625 | System.out.println("Symmetrical uncertainty: " + |
---|
626 | ContingencyTables.symmetricalUncertainty(matrix)); |
---|
627 | System.out.println("Tau value: " + |
---|
628 | ContingencyTables.tauVal(matrix)); |
---|
629 | double[][] newMatrix = new double[3][3]; |
---|
630 | newMatrix[0][0] = 1; newMatrix[0][1] = 0; newMatrix[0][2] = 1; |
---|
631 | newMatrix[1][0] = 0; newMatrix[1][1] = 0; newMatrix[1][2] = 0; |
---|
632 | newMatrix[2][0] = 1; newMatrix[2][1] = 0; newMatrix[2][2] = 1; |
---|
633 | System.out.println("Matrix with empty row and column: "); |
---|
634 | for (int i = 0; i < newMatrix.length; i++) { |
---|
635 | for (int j = 0; j < newMatrix[i].length; j++) { |
---|
636 | System.out.print(newMatrix[i][j] + " "); |
---|
637 | } |
---|
638 | System.out.println(); |
---|
639 | } |
---|
640 | System.out.println("Reduced matrix: "); |
---|
641 | newMatrix = ContingencyTables.reduceMatrix(newMatrix); |
---|
642 | for (int i = 0; i < newMatrix.length; i++) { |
---|
643 | for (int j = 0; j < newMatrix[i].length; j++) { |
---|
644 | System.out.print(newMatrix[i][j] + " "); |
---|
645 | } |
---|
646 | System.out.println(); |
---|
647 | } |
---|
648 | } |
---|
649 | } |
---|
650 | |
---|
651 | |
---|
652 | |
---|
653 | |
---|
654 | |
---|
655 | |
---|
656 | |
---|
657 | |
---|