1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /** |
---|
18 | * KStarNumericAttribute.java |
---|
19 | * Copyright (C) 1995 Univeristy of Waikato |
---|
20 | * Java port to Weka by Abdelaziz Mahoui (am14@cs.waikato.ac.nz). |
---|
21 | * |
---|
22 | */ |
---|
23 | |
---|
24 | package weka.classifiers.lazy.kstar; |
---|
25 | |
---|
26 | import weka.core.Attribute; |
---|
27 | import weka.core.Instance; |
---|
28 | import weka.core.Instances; |
---|
29 | import weka.core.RevisionHandler; |
---|
30 | import weka.core.RevisionUtils; |
---|
31 | |
---|
32 | /** |
---|
33 | * A custom class which provides the environment for computing the |
---|
34 | * transformation probability of a specified test instance numeric |
---|
35 | * attribute to a specified train instance numeric attribute. |
---|
36 | * |
---|
37 | * @author Len Trigg (len@reeltwo.com) |
---|
38 | * @author Abdelaziz Mahoui (am14@cs.waikato.ac.nz) |
---|
39 | * @version $Revision 1.0 $ |
---|
40 | */ |
---|
41 | public class KStarNumericAttribute |
---|
42 | implements KStarConstants, RevisionHandler { |
---|
43 | |
---|
44 | /** The training instances used for classification. */ |
---|
45 | protected Instances m_TrainSet; |
---|
46 | |
---|
47 | /** The test instance */ |
---|
48 | protected Instance m_Test; |
---|
49 | |
---|
50 | /** The train instance */ |
---|
51 | protected Instance m_Train; |
---|
52 | |
---|
53 | /** The index of the attribute in the test and train instances */ |
---|
54 | protected int m_AttrIndex; |
---|
55 | |
---|
56 | /** The scale parameter */ |
---|
57 | protected double m_Scale = 1.0; |
---|
58 | |
---|
59 | /** Probability of test attribute transforming into train attribute |
---|
60 | with missing value */ |
---|
61 | protected double m_MissingProb = 1.0; |
---|
62 | |
---|
63 | /** Average probability of test attribute transforming into train |
---|
64 | attribute */ |
---|
65 | protected double m_AverageProb = 1.0; |
---|
66 | |
---|
67 | /** Smallest probability of test attribute transforming into train |
---|
68 | attribute */ |
---|
69 | protected double m_SmallestProb = 1.0; |
---|
70 | |
---|
71 | /** The set of disctances from the test attribute to the set of train |
---|
72 | attributes */ |
---|
73 | protected double [] m_Distances; |
---|
74 | |
---|
75 | /** Set of colomns: each colomn representing a randomised version of |
---|
76 | the train dataset class colomn */ |
---|
77 | protected int [][] m_RandClassCols; |
---|
78 | |
---|
79 | /** The number of train instances with no missing attribute values */ |
---|
80 | protected int m_ActualCount = 0; |
---|
81 | |
---|
82 | /** A cache for storing attribute values and their corresponding scale |
---|
83 | parameters */ |
---|
84 | protected KStarCache m_Cache; |
---|
85 | |
---|
86 | /** The number of instances in the dataset */ |
---|
87 | protected int m_NumInstances; |
---|
88 | |
---|
89 | /** The number of class values */ |
---|
90 | protected int m_NumClasses; |
---|
91 | |
---|
92 | /** The number of attributes */ |
---|
93 | protected int m_NumAttributes; |
---|
94 | |
---|
95 | /** The class attribute type */ |
---|
96 | protected int m_ClassType; |
---|
97 | |
---|
98 | /** missing value treatment */ |
---|
99 | protected int m_MissingMode = M_AVERAGE; |
---|
100 | |
---|
101 | /** 0 = use specified blend, 1 = entropic blend setting */ |
---|
102 | protected int m_BlendMethod = B_SPHERE ; |
---|
103 | |
---|
104 | /** default sphere of influence blend setting */ |
---|
105 | protected int m_BlendFactor = 20; |
---|
106 | |
---|
107 | /** |
---|
108 | * Constructor |
---|
109 | */ |
---|
110 | public KStarNumericAttribute(Instance test, Instance train, int attrIndex, |
---|
111 | Instances trainSet, |
---|
112 | int [][] randClassCols, |
---|
113 | KStarCache cache) |
---|
114 | { |
---|
115 | m_Test = test; |
---|
116 | m_Train = train; |
---|
117 | m_AttrIndex = attrIndex; |
---|
118 | m_TrainSet = trainSet; |
---|
119 | m_RandClassCols = randClassCols; |
---|
120 | m_Cache = cache; |
---|
121 | init(); |
---|
122 | } |
---|
123 | |
---|
124 | /** |
---|
125 | * Initializes the m_Attributes of the class. |
---|
126 | */ |
---|
127 | private void init() { |
---|
128 | try { |
---|
129 | m_NumInstances = m_TrainSet.numInstances(); |
---|
130 | m_NumClasses = m_TrainSet.numClasses(); |
---|
131 | m_NumAttributes = m_TrainSet.numAttributes(); |
---|
132 | m_ClassType = m_TrainSet.classAttribute().type(); |
---|
133 | } catch(Exception e) { |
---|
134 | e.printStackTrace(); |
---|
135 | } |
---|
136 | } |
---|
137 | |
---|
138 | /** |
---|
139 | * Calculates the transformation probability of the attribute indexed |
---|
140 | * "m_AttrIndex" in test instance "m_Test" to the same attribute in |
---|
141 | * the train instance "m_Train". |
---|
142 | * |
---|
143 | * @return the probability value |
---|
144 | */ |
---|
145 | public double transProb() { |
---|
146 | String debug = "(KStarNumericAttribute.transProb) "; |
---|
147 | double transProb, distance, scale; |
---|
148 | // check if the attribute value has been encountred before |
---|
149 | // in which case it should be in the numeric cache |
---|
150 | if ( m_Cache.containsKey(m_Test.value(m_AttrIndex))) { |
---|
151 | KStarCache.TableEntry te = |
---|
152 | m_Cache.getCacheValues( m_Test.value(m_AttrIndex) ); |
---|
153 | m_Scale = te.value; |
---|
154 | m_MissingProb = te.pmiss; |
---|
155 | } |
---|
156 | else { |
---|
157 | if (m_BlendMethod == B_ENTROPY) { |
---|
158 | m_Scale = scaleFactorUsingEntropy(); |
---|
159 | } |
---|
160 | else { // default is B_SPHERE |
---|
161 | m_Scale = scaleFactorUsingBlend(); |
---|
162 | } |
---|
163 | m_Cache.store( m_Test.value(m_AttrIndex), m_Scale, m_MissingProb ); |
---|
164 | } |
---|
165 | // now what??? |
---|
166 | if (m_Train.isMissing(m_AttrIndex)) { |
---|
167 | transProb = m_MissingProb; |
---|
168 | } |
---|
169 | else { |
---|
170 | distance = |
---|
171 | Math.abs( m_Test.value(m_AttrIndex) - m_Train.value(m_AttrIndex) ); |
---|
172 | transProb = PStar( distance, m_Scale ); |
---|
173 | } |
---|
174 | return transProb; |
---|
175 | } |
---|
176 | |
---|
177 | /** |
---|
178 | * Calculates the scale factor for the attribute indexed |
---|
179 | * "m_AttrIndex" in test instance "m_Test" using a global |
---|
180 | * blending factor (default value is 20%). |
---|
181 | * |
---|
182 | * @return the scale factor value |
---|
183 | */ |
---|
184 | private double scaleFactorUsingBlend() { |
---|
185 | String debug = "(KStarNumericAttribute.scaleFactorUsingBlend)"; |
---|
186 | int i, j, lowestcount = 0, count = 0; |
---|
187 | double lowest = -1.0, nextlowest = -1.0; |
---|
188 | double root, broot, up, bot; |
---|
189 | double aimfor, min_val = 9e300, scale = 1.0; |
---|
190 | double avgprob = 0.0, minprob = 0.0, min_pos = 0.0; |
---|
191 | |
---|
192 | KStarWrapper botvals = new KStarWrapper(); |
---|
193 | KStarWrapper upvals = new KStarWrapper(); |
---|
194 | KStarWrapper vals = new KStarWrapper(); |
---|
195 | |
---|
196 | m_Distances = new double [m_NumInstances]; |
---|
197 | |
---|
198 | for (j=0; j<m_NumInstances; j++) { |
---|
199 | if ( m_TrainSet.instance(j).isMissing(m_AttrIndex) ) { |
---|
200 | // mark the train instance with a missing value by setting |
---|
201 | // the distance to -1.0 |
---|
202 | m_Distances[j] = -1.0; |
---|
203 | } |
---|
204 | else { |
---|
205 | m_Distances[j] = Math.abs(m_TrainSet.instance(j).value(m_AttrIndex) - |
---|
206 | m_Test.value(m_AttrIndex)); |
---|
207 | if ( (m_Distances[j]+1e-5) < nextlowest || nextlowest == -1.0 ) { |
---|
208 | if ( (m_Distances[j]+1e-5) < lowest || lowest == -1.0 ) { |
---|
209 | nextlowest = lowest; |
---|
210 | lowest = m_Distances[j]; |
---|
211 | lowestcount = 1; |
---|
212 | } |
---|
213 | else if ( Math.abs(m_Distances[j]-lowest) < 1e-5 ) { |
---|
214 | // record the number training instances (number n0) at |
---|
215 | // the smallest distance from test instance |
---|
216 | lowestcount++; |
---|
217 | } |
---|
218 | else { |
---|
219 | nextlowest = m_Distances[j]; |
---|
220 | } |
---|
221 | } |
---|
222 | // records the actual number of instances with no missing value |
---|
223 | m_ActualCount++; |
---|
224 | } |
---|
225 | } |
---|
226 | |
---|
227 | if (nextlowest == -1 || lowest == -1) { // Data values are all the same |
---|
228 | scale = 1.0; |
---|
229 | m_SmallestProb = m_AverageProb = 1.0; |
---|
230 | return scale; |
---|
231 | } |
---|
232 | else { |
---|
233 | // starting point for root |
---|
234 | root = 1.0 / (nextlowest - lowest); |
---|
235 | i = 0; |
---|
236 | // given the expression: n0 <= E(scale) <= N |
---|
237 | // E(scale) = (N - n0) * b + n0 with blending factor: 0 <= b <= 1 |
---|
238 | // aimfor = (N - n0) * b + n0 |
---|
239 | aimfor = (m_ActualCount - lowestcount) * |
---|
240 | (double)m_BlendFactor / 100.0 + lowestcount; |
---|
241 | if (m_BlendFactor == 0) { |
---|
242 | aimfor += 1.0; |
---|
243 | } |
---|
244 | // root is bracketed in interval [bot,up] |
---|
245 | bot = 0.0 + ROOT_FINDER_ACCURACY / 2.0; |
---|
246 | up = root * 16; // This is bodgy |
---|
247 | // E(bot) |
---|
248 | calculateSphereSize(bot, botvals); |
---|
249 | botvals.sphere -= aimfor; |
---|
250 | // E(up) |
---|
251 | calculateSphereSize(up, upvals); |
---|
252 | upvals.sphere -= aimfor; |
---|
253 | |
---|
254 | if (botvals.sphere < 0) { // Couldn't include that many |
---|
255 | // instances - going for max possible |
---|
256 | min_pos = bot; |
---|
257 | avgprob = botvals.avgProb; |
---|
258 | minprob = botvals.minProb; |
---|
259 | } |
---|
260 | else if (upvals.sphere > 0) { // Couldn't include that few, |
---|
261 | // going for min possible |
---|
262 | min_pos = up; |
---|
263 | avgprob = upvals.avgProb; |
---|
264 | minprob = upvals.minProb; |
---|
265 | } |
---|
266 | else { |
---|
267 | // Root finding Algorithm starts here ! |
---|
268 | for (;;) { |
---|
269 | calculateSphereSize(root, vals); |
---|
270 | vals.sphere -= aimfor; |
---|
271 | if ( Math.abs(vals.sphere) < min_val ) { |
---|
272 | min_val = Math.abs(vals.sphere); |
---|
273 | min_pos = root; |
---|
274 | avgprob = vals.avgProb; |
---|
275 | minprob = vals.minProb; |
---|
276 | } |
---|
277 | if ( Math.abs(vals.sphere) <= ROOT_FINDER_ACCURACY ) { |
---|
278 | break; // converged to a solution, done! |
---|
279 | } |
---|
280 | if (vals.sphere > 0.0) { |
---|
281 | broot = (root + up) / 2.0; |
---|
282 | bot = root; |
---|
283 | root = broot; |
---|
284 | } |
---|
285 | else { |
---|
286 | broot = (root + bot) / 2.0; |
---|
287 | up = root; |
---|
288 | root = broot; |
---|
289 | } |
---|
290 | i++; |
---|
291 | if (i > ROOT_FINDER_MAX_ITER) { |
---|
292 | // System.err.println("Warning: "+debug+" |
---|
293 | // ROOT_FINDER_MAX_ITER exceeded"); |
---|
294 | root = min_pos; |
---|
295 | break; |
---|
296 | } |
---|
297 | } |
---|
298 | } |
---|
299 | |
---|
300 | m_SmallestProb = minprob; |
---|
301 | m_AverageProb = avgprob; |
---|
302 | // Set the probability of transforming to a missing value |
---|
303 | switch ( m_MissingMode ) |
---|
304 | { |
---|
305 | case M_DELETE: |
---|
306 | m_MissingProb = 0.0; |
---|
307 | break; |
---|
308 | case M_NORMAL: |
---|
309 | m_MissingProb = 1.0; |
---|
310 | break; |
---|
311 | case M_MAXDIFF: |
---|
312 | m_MissingProb = m_SmallestProb; |
---|
313 | break; |
---|
314 | case M_AVERAGE: |
---|
315 | m_MissingProb = m_AverageProb; |
---|
316 | break; |
---|
317 | } |
---|
318 | // set the scale factor value |
---|
319 | scale = min_pos; |
---|
320 | return scale; |
---|
321 | } |
---|
322 | } |
---|
323 | |
---|
324 | /** |
---|
325 | * Calculates the size of the "sphere of influence" defined as: |
---|
326 | * sphere = sum(P)^2/sum(P^2) where |
---|
327 | * P(i) = root*exp(-2*i*root). |
---|
328 | * Since there are n different training instances we multiply P(i) by 1/n. |
---|
329 | */ |
---|
330 | private void calculateSphereSize(double scale, KStarWrapper params) { |
---|
331 | String debug = "(KStarNumericAttribute.calculateSphereSize)"; |
---|
332 | int i; |
---|
333 | double sphereSize, minprob = 1.0; |
---|
334 | double pstar; // P*(b|a) |
---|
335 | double pstarSum = 0.0; // sum(P*) |
---|
336 | double pstarSquareSum = 0.0; // sum(P*^2) |
---|
337 | double inc; |
---|
338 | for (i = 0; i < m_NumInstances; i++) { |
---|
339 | if (m_Distances[i] < 0) { |
---|
340 | // instance with missing value |
---|
341 | continue; |
---|
342 | } |
---|
343 | else { |
---|
344 | pstar = PStar( m_Distances[i], scale ); |
---|
345 | if (minprob > pstar) { |
---|
346 | minprob = pstar; |
---|
347 | } |
---|
348 | inc = pstar / m_ActualCount; |
---|
349 | pstarSum += inc; |
---|
350 | pstarSquareSum += inc * inc; |
---|
351 | } |
---|
352 | } |
---|
353 | sphereSize = (pstarSquareSum == 0 ? 0 |
---|
354 | : pstarSum * pstarSum / pstarSquareSum); |
---|
355 | // return the values |
---|
356 | params.sphere = sphereSize; |
---|
357 | params.avgProb = pstarSum; |
---|
358 | params.minProb = minprob; |
---|
359 | } |
---|
360 | |
---|
361 | /** |
---|
362 | * Calculates the scale factor using entropy. |
---|
363 | * |
---|
364 | * @return the scale factor value |
---|
365 | */ |
---|
366 | private double scaleFactorUsingEntropy() { |
---|
367 | String debug = "(KStarNumericAttribute.scaleFactorUsingEntropy)"; |
---|
368 | if ( m_ClassType != Attribute.NOMINAL ) { |
---|
369 | System.err.println("Error: "+debug+" attribute class must be nominal!"); |
---|
370 | System.exit(1); |
---|
371 | } |
---|
372 | int i,j, lowestcount = 0, count, itcount; |
---|
373 | double lowest = -1.0, nextlowest = -1.0; |
---|
374 | double root, up, bot, stepsize, delta; |
---|
375 | double actentropy = 0.0, randentropy = 0.0, actscale, randscale; |
---|
376 | double minrand = 0.0, minact = 0.0, maxrand = 0.0, maxact = 0.0; |
---|
377 | double bestdiff, bestroot, currentdiff, lastdiff; |
---|
378 | double bestpsum, bestminprob, scale = 1.0; |
---|
379 | |
---|
380 | KStarWrapper botvals = new KStarWrapper(); |
---|
381 | KStarWrapper upvals = new KStarWrapper(); |
---|
382 | KStarWrapper vals = new KStarWrapper(); |
---|
383 | |
---|
384 | m_Distances = new double [m_NumInstances]; |
---|
385 | |
---|
386 | for (j=0; j<m_NumInstances; j++) { |
---|
387 | if ( m_TrainSet.instance(j).isMissing(m_AttrIndex) ) { |
---|
388 | // mark the train instance with a missing value by setting |
---|
389 | // the distance to -1.0 |
---|
390 | m_Distances[j] = -1.0; |
---|
391 | } |
---|
392 | else { |
---|
393 | m_Distances[j] = Math.abs(m_TrainSet.instance(j).value(m_AttrIndex) - |
---|
394 | m_Test.value(m_AttrIndex)); |
---|
395 | |
---|
396 | if ( (m_Distances[j]+1e-5) < nextlowest || nextlowest == -1.0 ) { |
---|
397 | if ( (m_Distances[j]+1e-5) < lowest || lowest == -1.0 ) { |
---|
398 | nextlowest = lowest; |
---|
399 | lowest = m_Distances[j]; |
---|
400 | lowestcount = 1; |
---|
401 | } |
---|
402 | else if ( Math.abs(m_Distances[j]-lowest) < 1e-5 ) { |
---|
403 | // record the number training instances (number n0) at |
---|
404 | // the smallest distance from test instance |
---|
405 | lowestcount++; |
---|
406 | } |
---|
407 | else { |
---|
408 | nextlowest = m_Distances[j]; |
---|
409 | } |
---|
410 | } |
---|
411 | // records the actual number of instances with no missing value |
---|
412 | m_ActualCount++; |
---|
413 | } |
---|
414 | } // for |
---|
415 | |
---|
416 | if (nextlowest == -1 || lowest == -1) { // Data values are all the same |
---|
417 | scale = 1.0; |
---|
418 | m_SmallestProb = m_AverageProb = 1.0; |
---|
419 | return scale; |
---|
420 | } |
---|
421 | else { |
---|
422 | // starting point for root |
---|
423 | root = 1.0 / (nextlowest - lowest); |
---|
424 | // root is bracketed in interval [bot,up] |
---|
425 | bot = 0.0 + ROOT_FINDER_ACCURACY / 2; |
---|
426 | up = root * 8; // This is bodgy |
---|
427 | // Find (approx) entropy ranges |
---|
428 | calculateEntropy(up, upvals); |
---|
429 | calculateEntropy(bot, botvals); |
---|
430 | actscale = botvals.actEntropy - upvals.actEntropy; |
---|
431 | randscale = botvals.randEntropy - upvals.randEntropy; |
---|
432 | // Optimise the scale factor |
---|
433 | bestroot = root = bot; |
---|
434 | bestdiff = currentdiff = FLOOR1; |
---|
435 | bestpsum = botvals.avgProb; |
---|
436 | bestminprob = botvals.minProb; |
---|
437 | stepsize = (up - bot) / 20.0; |
---|
438 | itcount = 0; |
---|
439 | // Root finding algorithm starts here! |
---|
440 | while (true) |
---|
441 | { |
---|
442 | itcount++; |
---|
443 | lastdiff = currentdiff; |
---|
444 | root += Math.log(root + 1.0) * stepsize; |
---|
445 | if (root <= bot) { |
---|
446 | root = bot; |
---|
447 | currentdiff = 0.0; |
---|
448 | delta = -1.0; |
---|
449 | } |
---|
450 | else if (root >= up) { |
---|
451 | root = up; |
---|
452 | currentdiff = 0.0; |
---|
453 | delta = -1.0; |
---|
454 | } |
---|
455 | else { |
---|
456 | calculateEntropy(root, vals); |
---|
457 | // Normalise entropies |
---|
458 | vals.randEntropy = (vals.randEntropy - upvals.randEntropy) / |
---|
459 | randscale; |
---|
460 | vals.actEntropy = (vals.actEntropy - upvals.actEntropy) / |
---|
461 | randscale; |
---|
462 | currentdiff = vals.randEntropy - vals.actEntropy; |
---|
463 | |
---|
464 | if (currentdiff < FLOOR1) { |
---|
465 | currentdiff = FLOOR1; |
---|
466 | if (stepsize < 0) { |
---|
467 | // If we've hit the end and turned around we can't |
---|
468 | // have found any peaks |
---|
469 | bestdiff = currentdiff; |
---|
470 | bestroot = bot; |
---|
471 | bestpsum = botvals.avgProb; |
---|
472 | bestminprob = botvals.minProb; |
---|
473 | break; |
---|
474 | } |
---|
475 | } |
---|
476 | delta = currentdiff - lastdiff; |
---|
477 | } |
---|
478 | if (currentdiff > bestdiff) { |
---|
479 | bestdiff = currentdiff; |
---|
480 | bestroot = root; |
---|
481 | bestminprob = vals.minProb; |
---|
482 | bestpsum = vals.avgProb; |
---|
483 | } |
---|
484 | if (delta < 0) { |
---|
485 | if (Math.abs(stepsize) < ROOT_FINDER_ACCURACY) { |
---|
486 | break; |
---|
487 | } |
---|
488 | else { |
---|
489 | stepsize /= -4.0; |
---|
490 | } |
---|
491 | } |
---|
492 | if (itcount > ROOT_FINDER_MAX_ITER) { |
---|
493 | // System.err.println("Warning: "+debug+" ROOT_FINDER_MAX_ITER |
---|
494 | // exceeded"); |
---|
495 | break; |
---|
496 | } |
---|
497 | } // while |
---|
498 | |
---|
499 | m_SmallestProb = bestminprob; |
---|
500 | m_AverageProb = bestpsum; |
---|
501 | // Set the probability of transforming to a missing value |
---|
502 | switch ( m_MissingMode ) |
---|
503 | { |
---|
504 | case M_DELETE: |
---|
505 | m_MissingProb = 0.0; |
---|
506 | break; |
---|
507 | case M_NORMAL: |
---|
508 | m_MissingProb = 1.0; |
---|
509 | break; |
---|
510 | case M_MAXDIFF: |
---|
511 | m_MissingProb = m_SmallestProb; |
---|
512 | break; |
---|
513 | case M_AVERAGE: |
---|
514 | m_MissingProb = m_AverageProb; |
---|
515 | break; |
---|
516 | } |
---|
517 | // set scale factor |
---|
518 | scale = bestroot; |
---|
519 | } // else |
---|
520 | return scale; |
---|
521 | } |
---|
522 | |
---|
523 | /** |
---|
524 | * Calculates several parameters aside from the entropy: for a specified |
---|
525 | * scale factor, calculates the actual entropy, a random entropy using a |
---|
526 | * randomized set of class value colomns, and records the average and |
---|
527 | * smallest probabilities (for use in missing value case). |
---|
528 | */ |
---|
529 | private void calculateEntropy(double scale, KStarWrapper params) { |
---|
530 | String debug = "(KStarNumericAttribute.calculateEntropy)"; |
---|
531 | int i,j,k; |
---|
532 | double actent = 0.0, randent = 0.0; |
---|
533 | double pstar, tprob, avgprob = 0.0, minprob = 1.0; |
---|
534 | double actClassProb, randClassProb; |
---|
535 | double [][] pseudoClassProbs = new double[NUM_RAND_COLS+1][m_NumClasses]; |
---|
536 | // init |
---|
537 | for(j = 0; j <= NUM_RAND_COLS; j++) { |
---|
538 | for(i = 0; i < m_NumClasses; i++) { |
---|
539 | pseudoClassProbs[j][i] = 0.0; |
---|
540 | } |
---|
541 | } |
---|
542 | for (i=0; i < m_NumInstances; i++) { |
---|
543 | if (m_Distances[i] < 0) { |
---|
544 | // train instance has mising value |
---|
545 | continue; |
---|
546 | } |
---|
547 | else { |
---|
548 | pstar = PStar(m_Distances[i], scale); |
---|
549 | tprob = pstar / m_ActualCount; |
---|
550 | avgprob += tprob; |
---|
551 | if (pstar < minprob) { |
---|
552 | minprob = pstar; |
---|
553 | } |
---|
554 | // filter instances with same class value |
---|
555 | for (k=0; k <= NUM_RAND_COLS; k++) { |
---|
556 | // instance i is assigned a random class value in colomn k; |
---|
557 | // colomn k = NUM_RAND_COLS contains the original mapping: |
---|
558 | // instance -> class vlaue |
---|
559 | pseudoClassProbs[k][ m_RandClassCols[k][i] ] += tprob; |
---|
560 | } |
---|
561 | } |
---|
562 | } |
---|
563 | // compute the actual entropy using the class probabilities |
---|
564 | // with the original class value mapping (colomn NUM_RAND_COLS) |
---|
565 | for (j = m_NumClasses-1; j >= 0; j--) { |
---|
566 | actClassProb = pseudoClassProbs[NUM_RAND_COLS][j] / avgprob; |
---|
567 | if (actClassProb > 0) { |
---|
568 | actent -= actClassProb * Math.log(actClassProb) / LOG2; |
---|
569 | } |
---|
570 | } |
---|
571 | // compute a random entropy using the pseudo class probs |
---|
572 | // excluding the colomn NUM_RAND_COLS |
---|
573 | for (k=0; k < NUM_RAND_COLS; k++) { |
---|
574 | for (i = m_NumClasses-1; i >= 0; i--) { |
---|
575 | randClassProb = pseudoClassProbs[k][i] / avgprob; |
---|
576 | if (randClassProb > 0) { |
---|
577 | randent -= randClassProb * Math.log(randClassProb) / LOG2; |
---|
578 | } |
---|
579 | } |
---|
580 | } |
---|
581 | randent /= NUM_RAND_COLS; |
---|
582 | // return the values |
---|
583 | params.actEntropy = actent; |
---|
584 | params.randEntropy = randent; |
---|
585 | params.avgProb = avgprob; |
---|
586 | params.minProb = minprob; |
---|
587 | } |
---|
588 | |
---|
589 | /** |
---|
590 | * Calculates the value of P for a given value x using the expression: |
---|
591 | * P(x) = scale * exp( -2.0 * x * scale ) |
---|
592 | * |
---|
593 | * @param x input value |
---|
594 | * @param scale the scale factor |
---|
595 | * @return output of the function P(x) |
---|
596 | */ |
---|
597 | private double PStar(double x, double scale) { |
---|
598 | return scale * Math.exp( -2.0 * x * scale ); |
---|
599 | } |
---|
600 | |
---|
601 | /** |
---|
602 | * Set options. |
---|
603 | * @param missingmode the missing value treatment to use |
---|
604 | * @param blendmethod the blending method to use |
---|
605 | * @param blendfactor the level of blending to use |
---|
606 | */ |
---|
607 | public void setOptions(int missingmode, int blendmethod, int blendfactor) { |
---|
608 | m_MissingMode = missingmode; |
---|
609 | m_BlendMethod = blendmethod; |
---|
610 | m_BlendFactor = blendfactor; |
---|
611 | } |
---|
612 | |
---|
613 | /** |
---|
614 | * Set the missing value mode. |
---|
615 | * @param mode the type of missing value treatment to use |
---|
616 | */ |
---|
617 | public void setMissingMode(int mode) { |
---|
618 | m_MissingMode = mode; |
---|
619 | } |
---|
620 | |
---|
621 | /** |
---|
622 | * Set the blending method |
---|
623 | * @param method the blending method to use |
---|
624 | */ |
---|
625 | public void setBlendMethod(int method) { |
---|
626 | m_BlendMethod = method; |
---|
627 | } |
---|
628 | |
---|
629 | /** |
---|
630 | * Set the blending factor |
---|
631 | * @param factor the level of blending to use |
---|
632 | */ |
---|
633 | public void setBlendFactor(int factor) { |
---|
634 | m_BlendFactor = factor; |
---|
635 | } |
---|
636 | |
---|
637 | /** |
---|
638 | * Returns the revision string. |
---|
639 | * |
---|
640 | * @return the revision |
---|
641 | */ |
---|
642 | public String getRevision() { |
---|
643 | return RevisionUtils.extract("$Revision: 1.7 $"); |
---|
644 | } |
---|
645 | } // class |
---|