1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /** |
---|
18 | * KStarNominalAttribute.java |
---|
19 | * Copyright (C) 1995 Univeristy of Waikato |
---|
20 | * Java port to Weka by Abdelaziz Mahoui (am14@cs.waikato.ac.nz). |
---|
21 | * |
---|
22 | */ |
---|
23 | |
---|
24 | |
---|
25 | package weka.classifiers.lazy.kstar; |
---|
26 | |
---|
27 | import weka.core.Attribute; |
---|
28 | import weka.core.Instance; |
---|
29 | import weka.core.Instances; |
---|
30 | import weka.core.RevisionHandler; |
---|
31 | import weka.core.RevisionUtils; |
---|
32 | |
---|
33 | /** |
---|
34 | * A custom class which provides the environment for computing the |
---|
35 | * transformation probability of a specified test instance nominal |
---|
36 | * attribute to a specified train instance nominal attribute. |
---|
37 | * |
---|
38 | * @author Len Trigg (len@reeltwo.com) |
---|
39 | * @author Abdelaziz Mahoui (am14@cs.waikato.ac.nz) |
---|
40 | * @version $Revision 1.0 $ |
---|
41 | */ |
---|
42 | public class KStarNominalAttribute |
---|
43 | implements KStarConstants, RevisionHandler { |
---|
44 | |
---|
45 | /** The training instances used for classification. */ |
---|
46 | protected Instances m_TrainSet; |
---|
47 | |
---|
48 | /** The test instance */ |
---|
49 | protected Instance m_Test; |
---|
50 | |
---|
51 | /** The train instance */ |
---|
52 | protected Instance m_Train; |
---|
53 | |
---|
54 | /** The index of the nominal attribute in the test and train instances */ |
---|
55 | protected int m_AttrIndex; |
---|
56 | |
---|
57 | /** The stop parameter */ |
---|
58 | protected double m_Stop = 1.0; |
---|
59 | |
---|
60 | /** Probability of test attribute transforming into train attribute |
---|
61 | with missing value */ |
---|
62 | protected double m_MissingProb = 1.0; |
---|
63 | |
---|
64 | /** Average probability of test attribute transforming into train |
---|
65 | attribute */ |
---|
66 | protected double m_AverageProb = 1.0; |
---|
67 | |
---|
68 | /** Smallest probability of test attribute transforming into |
---|
69 | train attribute */ |
---|
70 | protected double m_SmallestProb = 1.0; |
---|
71 | |
---|
72 | /** Number of trai instances with no missing attribute values */ |
---|
73 | protected int m_TotalCount; |
---|
74 | |
---|
75 | /** Distribution of the attribute value in the train dataset */ |
---|
76 | protected int [] m_Distribution; |
---|
77 | |
---|
78 | /** Set of colomns: each colomn representing a randomised version |
---|
79 | of the train dataset class colomn */ |
---|
80 | protected int [][] m_RandClassCols; |
---|
81 | |
---|
82 | /** A cache for storing attribute values and their corresponding |
---|
83 | stop parameters */ |
---|
84 | protected KStarCache m_Cache; |
---|
85 | |
---|
86 | // KStar Global settings |
---|
87 | |
---|
88 | /** The number of instances in the dataset */ |
---|
89 | protected int m_NumInstances; |
---|
90 | |
---|
91 | /** The number of class values */ |
---|
92 | protected int m_NumClasses; |
---|
93 | |
---|
94 | /** The number of attributes */ |
---|
95 | protected int m_NumAttributes; |
---|
96 | |
---|
97 | /** The class attribute type */ |
---|
98 | protected int m_ClassType; |
---|
99 | |
---|
100 | /** missing value treatment */ |
---|
101 | protected int m_MissingMode = M_AVERAGE; |
---|
102 | |
---|
103 | /** B_SPHERE = use specified blend, B_ENTROPY = entropic blend setting */ |
---|
104 | protected int m_BlendMethod = B_SPHERE ; |
---|
105 | |
---|
106 | /** default sphere of influence blend setting */ |
---|
107 | protected int m_BlendFactor = 20; |
---|
108 | |
---|
109 | /** |
---|
110 | * Constructor |
---|
111 | */ |
---|
112 | public KStarNominalAttribute(Instance test, Instance train, int attrIndex, |
---|
113 | Instances trainSet, int [][] randClassCol, |
---|
114 | KStarCache cache) |
---|
115 | { |
---|
116 | m_Test = test; |
---|
117 | m_Train = train; |
---|
118 | m_AttrIndex = attrIndex; |
---|
119 | m_TrainSet = trainSet; |
---|
120 | m_RandClassCols = randClassCol; |
---|
121 | m_Cache = cache; |
---|
122 | init(); |
---|
123 | } |
---|
124 | |
---|
125 | /** |
---|
126 | * Initializes the m_Attributes of the class. |
---|
127 | */ |
---|
128 | private void init() { |
---|
129 | try { |
---|
130 | m_NumInstances = m_TrainSet.numInstances(); |
---|
131 | m_NumClasses = m_TrainSet.numClasses(); |
---|
132 | m_NumAttributes = m_TrainSet.numAttributes(); |
---|
133 | m_ClassType = m_TrainSet.classAttribute().type(); |
---|
134 | } catch(Exception e) { |
---|
135 | e.printStackTrace(); |
---|
136 | } |
---|
137 | } |
---|
138 | |
---|
139 | /** |
---|
140 | * Calculates the probability of the indexed nominal attribute of the test |
---|
141 | * instance transforming into the indexed nominal attribute of the training |
---|
142 | * instance. |
---|
143 | * |
---|
144 | * @return the value of the transformation probability. |
---|
145 | */ |
---|
146 | public double transProb() { |
---|
147 | String debug = "(KStarNominalAttribute.transProb) "; |
---|
148 | double transProb = 0.0; |
---|
149 | // check if the attribute value has been encountred before |
---|
150 | // in which case it should be in the nominal cache |
---|
151 | if (m_Cache.containsKey(m_Test.value(m_AttrIndex))) { |
---|
152 | KStarCache.TableEntry te = |
---|
153 | m_Cache.getCacheValues(m_Test.value(m_AttrIndex)); |
---|
154 | m_Stop = te.value; |
---|
155 | m_MissingProb = te.pmiss; |
---|
156 | } |
---|
157 | else { |
---|
158 | generateAttrDistribution(); |
---|
159 | // we have to compute the parameters |
---|
160 | if (m_BlendMethod == B_ENTROPY) { |
---|
161 | m_Stop = stopProbUsingEntropy(); |
---|
162 | } |
---|
163 | else { // default is B_SPHERE |
---|
164 | m_Stop = stopProbUsingBlend(); |
---|
165 | } |
---|
166 | // store the values in cache |
---|
167 | m_Cache.store( m_Test.value(m_AttrIndex), m_Stop, m_MissingProb ); |
---|
168 | } |
---|
169 | // we've got our m_Stop, then what? |
---|
170 | if (m_Train.isMissing(m_AttrIndex)) { |
---|
171 | transProb = m_MissingProb; |
---|
172 | } |
---|
173 | else { |
---|
174 | try { |
---|
175 | transProb = (1.0 - m_Stop) / m_Test.attribute(m_AttrIndex).numValues(); |
---|
176 | if ( (int)m_Test.value(m_AttrIndex) == |
---|
177 | (int)m_Train.value(m_AttrIndex) ) |
---|
178 | { |
---|
179 | transProb += m_Stop; |
---|
180 | } |
---|
181 | } catch (Exception e) { |
---|
182 | e.printStackTrace(); |
---|
183 | } |
---|
184 | } |
---|
185 | return transProb; |
---|
186 | } |
---|
187 | |
---|
188 | /** |
---|
189 | * Calculates the "stop parameter" for this attribute using |
---|
190 | * the entropy method: the value is computed using a root finder |
---|
191 | * algorithm. The method takes advantage of the calculation to |
---|
192 | * compute the smallest and average transformation probabilities |
---|
193 | * once the stop factor is obtained. It also sets the transformation |
---|
194 | * probability to an attribute with a missing value. |
---|
195 | * |
---|
196 | * @return the value of the stop parameter. |
---|
197 | * |
---|
198 | */ |
---|
199 | private double stopProbUsingEntropy() { |
---|
200 | String debug = "(KStarNominalAttribute.stopProbUsingEntropy)"; |
---|
201 | if ( m_ClassType != Attribute.NOMINAL ) { |
---|
202 | System.err.println("Error: "+debug+" attribute class must be nominal!"); |
---|
203 | System.exit(1); |
---|
204 | } |
---|
205 | int itcount = 0; |
---|
206 | double stopProb; |
---|
207 | double lower, upper, pstop; |
---|
208 | double bestminprob = 0.0, bestpsum = 0.0; |
---|
209 | double bestdiff = 0.0, bestpstop = 0.0; |
---|
210 | double currentdiff, lastdiff, stepsize, delta; |
---|
211 | |
---|
212 | KStarWrapper botvals = new KStarWrapper(); |
---|
213 | KStarWrapper upvals = new KStarWrapper(); |
---|
214 | KStarWrapper vals = new KStarWrapper(); |
---|
215 | |
---|
216 | // Initial values for root finder |
---|
217 | lower = 0.0 + ROOT_FINDER_ACCURACY/2.0; |
---|
218 | upper = 1.0 - ROOT_FINDER_ACCURACY/2.0; |
---|
219 | |
---|
220 | // Find (approx) entropy ranges |
---|
221 | calculateEntropy(upper, upvals); |
---|
222 | calculateEntropy(lower, botvals); |
---|
223 | |
---|
224 | if (upvals.avgProb == 0) { |
---|
225 | // When there are no training instances with the test value: |
---|
226 | // doesn't matter what exact value we use for pstop, just acts as |
---|
227 | // a constant scale factor in this case. |
---|
228 | calculateEntropy(lower, vals); |
---|
229 | } |
---|
230 | else |
---|
231 | { |
---|
232 | // Optimise the scale factor |
---|
233 | if ( (upvals.randEntropy - upvals.actEntropy < |
---|
234 | botvals.randEntropy - botvals.actEntropy) && |
---|
235 | (botvals.randEntropy - botvals.actEntropy > FLOOR) ) |
---|
236 | { |
---|
237 | bestpstop = pstop = lower; |
---|
238 | stepsize = INITIAL_STEP; |
---|
239 | bestminprob = botvals.minProb; |
---|
240 | bestpsum = botvals.avgProb; |
---|
241 | } |
---|
242 | else { |
---|
243 | bestpstop = pstop = upper; |
---|
244 | stepsize = -INITIAL_STEP; |
---|
245 | bestminprob = upvals.minProb; |
---|
246 | bestpsum = upvals.avgProb; |
---|
247 | } |
---|
248 | bestdiff = currentdiff = FLOOR; |
---|
249 | itcount = 0; |
---|
250 | /* Enter the root finder */ |
---|
251 | while (true) |
---|
252 | { |
---|
253 | itcount++; |
---|
254 | lastdiff = currentdiff; |
---|
255 | pstop += stepsize; |
---|
256 | if (pstop <= lower) { |
---|
257 | pstop = lower; |
---|
258 | currentdiff = 0.0; |
---|
259 | delta = -1.0; |
---|
260 | } |
---|
261 | else if (pstop >= upper) { |
---|
262 | pstop = upper; |
---|
263 | currentdiff = 0.0; |
---|
264 | delta = -1.0; |
---|
265 | } |
---|
266 | else { |
---|
267 | calculateEntropy(pstop, vals); |
---|
268 | currentdiff = vals.randEntropy - vals.actEntropy; |
---|
269 | |
---|
270 | if (currentdiff < FLOOR) { |
---|
271 | currentdiff = FLOOR; |
---|
272 | if ((Math.abs(stepsize) < INITIAL_STEP) && |
---|
273 | (bestdiff == FLOOR)) { |
---|
274 | bestpstop = lower; |
---|
275 | bestminprob = botvals.minProb; |
---|
276 | bestpsum = botvals.avgProb; |
---|
277 | break; |
---|
278 | } |
---|
279 | } |
---|
280 | delta = currentdiff - lastdiff; |
---|
281 | } |
---|
282 | if (currentdiff > bestdiff) { |
---|
283 | bestdiff = currentdiff; |
---|
284 | bestpstop = pstop; |
---|
285 | bestminprob = vals.minProb; |
---|
286 | bestpsum = vals.avgProb; |
---|
287 | } |
---|
288 | if (delta < 0) { |
---|
289 | if (Math.abs(stepsize) < ROOT_FINDER_ACCURACY) { |
---|
290 | break; |
---|
291 | } |
---|
292 | else { |
---|
293 | stepsize /= -2.0; |
---|
294 | } |
---|
295 | } |
---|
296 | if (itcount > ROOT_FINDER_MAX_ITER) { |
---|
297 | break; |
---|
298 | } |
---|
299 | } |
---|
300 | } |
---|
301 | |
---|
302 | m_SmallestProb = bestminprob; |
---|
303 | m_AverageProb = bestpsum; |
---|
304 | // Set the probability of transforming to a missing value |
---|
305 | switch ( m_MissingMode ) |
---|
306 | { |
---|
307 | case M_DELETE: |
---|
308 | m_MissingProb = 0.0; |
---|
309 | break; |
---|
310 | case M_NORMAL: |
---|
311 | m_MissingProb = 1.0; |
---|
312 | break; |
---|
313 | case M_MAXDIFF: |
---|
314 | m_MissingProb = m_SmallestProb; |
---|
315 | break; |
---|
316 | case M_AVERAGE: |
---|
317 | m_MissingProb = m_AverageProb; |
---|
318 | break; |
---|
319 | } |
---|
320 | |
---|
321 | if ( Math.abs(bestpsum - (double)m_TotalCount) < EPSILON) { |
---|
322 | // No difference in the values |
---|
323 | stopProb = 1.0; |
---|
324 | } |
---|
325 | else { |
---|
326 | stopProb = bestpstop; |
---|
327 | } |
---|
328 | return stopProb; |
---|
329 | } |
---|
330 | |
---|
331 | /** |
---|
332 | * Calculates the entropy of the actual class prediction |
---|
333 | * and the entropy for random class prediction. It also |
---|
334 | * calculates the smallest and average transformation probabilities. |
---|
335 | * |
---|
336 | * @param stop the stop parameter |
---|
337 | * @param params the object wrapper for the parameters: |
---|
338 | * actual entropy, random entropy, average probability and smallest |
---|
339 | * probability. |
---|
340 | * @return the values are returned in the object "params". |
---|
341 | * |
---|
342 | */ |
---|
343 | private void calculateEntropy( double stop, KStarWrapper params) { |
---|
344 | String debug = "(KStarNominalAttribute.calculateEntropy)"; |
---|
345 | int i,j,k; |
---|
346 | Instance train; |
---|
347 | double actent = 0.0, randent=0.0; |
---|
348 | double pstar, tprob, psum=0.0, minprob=1.0; |
---|
349 | double actClassProb, randClassProb; |
---|
350 | double [][] pseudoClassProb = new double[NUM_RAND_COLS+1][m_NumClasses]; |
---|
351 | // init ... |
---|
352 | for(j = 0; j <= NUM_RAND_COLS; j++) { |
---|
353 | for(i = 0; i < m_NumClasses; i++) { |
---|
354 | pseudoClassProb[j][i] = 0.0; |
---|
355 | } |
---|
356 | } |
---|
357 | for (i=0; i < m_NumInstances; i++) { |
---|
358 | train = m_TrainSet.instance(i); |
---|
359 | if (!train.isMissing(m_AttrIndex)) { |
---|
360 | pstar = PStar(m_Test, train, m_AttrIndex, stop); |
---|
361 | tprob = pstar / m_TotalCount; |
---|
362 | if (pstar < minprob) { |
---|
363 | minprob = pstar; |
---|
364 | } |
---|
365 | psum += tprob; |
---|
366 | // filter instances with same class value |
---|
367 | for (k=0 ; k <= NUM_RAND_COLS ; k++) { |
---|
368 | // instance i is assigned a random class value in colomn k; |
---|
369 | // colomn k = NUM_RAND_COLS contains the original mapping: |
---|
370 | // instance -> class vlaue |
---|
371 | pseudoClassProb[k][ m_RandClassCols[k][i] ] += tprob; |
---|
372 | } |
---|
373 | } |
---|
374 | } |
---|
375 | // compute the actual entropy using the class probs |
---|
376 | // with the original class value mapping (colomn NUM_RAND_COLS) |
---|
377 | for (j=m_NumClasses-1; j>=0; j--) { |
---|
378 | actClassProb = pseudoClassProb[NUM_RAND_COLS][j] / psum; |
---|
379 | if (actClassProb > 0) { |
---|
380 | actent -= actClassProb * Math.log(actClassProb) / LOG2; |
---|
381 | } |
---|
382 | } |
---|
383 | // compute a random entropy using the pseudo class probs |
---|
384 | // excluding the colomn NUM_RAND_COLS |
---|
385 | for (k=0; k < NUM_RAND_COLS;k++) { |
---|
386 | for (i = m_NumClasses-1; i >= 0; i--) { |
---|
387 | randClassProb = pseudoClassProb[k][i] / psum; |
---|
388 | if (randClassProb > 0) { |
---|
389 | randent -= randClassProb * Math.log(randClassProb) / LOG2; |
---|
390 | } |
---|
391 | } |
---|
392 | } |
---|
393 | randent /= NUM_RAND_COLS; |
---|
394 | // return the results ... Yuk !!! |
---|
395 | params.actEntropy = actent; |
---|
396 | params.randEntropy = randent; |
---|
397 | params.avgProb = psum; |
---|
398 | params.minProb = minprob; |
---|
399 | } |
---|
400 | |
---|
401 | /** |
---|
402 | * Calculates the "stop parameter" for this attribute using |
---|
403 | * the blend method: the value is computed using a root finder |
---|
404 | * algorithm. The method takes advantage of this calculation to |
---|
405 | * compute the smallest and average transformation probabilities |
---|
406 | * once the stop factor is obtained. It also sets the transformation |
---|
407 | * probability to an attribute with a missing value. |
---|
408 | * |
---|
409 | * @return the value of the stop parameter. |
---|
410 | * |
---|
411 | */ |
---|
412 | private double stopProbUsingBlend() { |
---|
413 | String debug = "(KStarNominalAttribute.stopProbUsingBlend) "; |
---|
414 | int itcount = 0; |
---|
415 | double stopProb, aimfor; |
---|
416 | double lower, upper, tstop; |
---|
417 | |
---|
418 | KStarWrapper botvals = new KStarWrapper(); |
---|
419 | KStarWrapper upvals = new KStarWrapper(); |
---|
420 | KStarWrapper vals = new KStarWrapper(); |
---|
421 | |
---|
422 | int testvalue = (int)m_Test.value(m_AttrIndex); |
---|
423 | aimfor = (m_TotalCount - m_Distribution[testvalue]) * |
---|
424 | (double)m_BlendFactor / 100.0 + m_Distribution[testvalue]; |
---|
425 | |
---|
426 | // Initial values for root finder |
---|
427 | tstop = 1.0 - (double)m_BlendFactor / 100.0; |
---|
428 | lower = 0.0 + ROOT_FINDER_ACCURACY/2.0; |
---|
429 | upper = 1.0 - ROOT_FINDER_ACCURACY/2.0; |
---|
430 | |
---|
431 | // Find out function border values |
---|
432 | calculateSphereSize(testvalue, lower, botvals); |
---|
433 | botvals.sphere -= aimfor; |
---|
434 | calculateSphereSize(testvalue, upper, upvals); |
---|
435 | upvals.sphere -= aimfor; |
---|
436 | |
---|
437 | if (upvals.avgProb == 0) { |
---|
438 | // When there are no training instances with the test value: |
---|
439 | // doesn't matter what exact value we use for tstop, just acts as |
---|
440 | // a constant scale factor in this case. |
---|
441 | calculateSphereSize(testvalue, tstop, vals); |
---|
442 | } |
---|
443 | else if (upvals.sphere > 0) { |
---|
444 | // Can't include aimfor instances, going for min possible |
---|
445 | tstop = upper; |
---|
446 | vals.avgProb = upvals.avgProb; |
---|
447 | } |
---|
448 | else { |
---|
449 | // Enter the root finder |
---|
450 | for (;;) { |
---|
451 | itcount++; |
---|
452 | calculateSphereSize(testvalue, tstop, vals); |
---|
453 | vals.sphere -= aimfor; |
---|
454 | if ( Math.abs(vals.sphere) <= ROOT_FINDER_ACCURACY || |
---|
455 | itcount >= ROOT_FINDER_MAX_ITER ) |
---|
456 | { |
---|
457 | break; |
---|
458 | } |
---|
459 | if (vals.sphere > 0.0) { |
---|
460 | lower = tstop; |
---|
461 | tstop = (upper + lower) / 2.0; |
---|
462 | } |
---|
463 | else { |
---|
464 | upper = tstop; |
---|
465 | tstop = (upper + lower) / 2.0; |
---|
466 | } |
---|
467 | } |
---|
468 | } |
---|
469 | |
---|
470 | m_SmallestProb = vals.minProb; |
---|
471 | m_AverageProb = vals.avgProb; |
---|
472 | // Set the probability of transforming to a missing value |
---|
473 | switch ( m_MissingMode ) |
---|
474 | { |
---|
475 | case M_DELETE: |
---|
476 | m_MissingProb = 0.0; |
---|
477 | break; |
---|
478 | case M_NORMAL: |
---|
479 | m_MissingProb = 1.0; |
---|
480 | break; |
---|
481 | case M_MAXDIFF: |
---|
482 | m_MissingProb = m_SmallestProb; |
---|
483 | break; |
---|
484 | case M_AVERAGE: |
---|
485 | m_MissingProb = m_AverageProb; |
---|
486 | break; |
---|
487 | } |
---|
488 | |
---|
489 | if ( Math.abs(vals.avgProb - m_TotalCount) < EPSILON) { |
---|
490 | // No difference in the values |
---|
491 | stopProb = 1.0; |
---|
492 | } |
---|
493 | else { |
---|
494 | stopProb = tstop; |
---|
495 | } |
---|
496 | return stopProb; |
---|
497 | } |
---|
498 | |
---|
499 | /** |
---|
500 | * Calculates the size of the "sphere of influence" defined as: |
---|
501 | * sphere = sum(P^2)/sum(P)^2 |
---|
502 | * P(i|j) = (1-tstop)*P(i) + ((i==j)?tstop:0). |
---|
503 | * This method takes advantage of the calculation to compute the values of |
---|
504 | * the "smallest" and "average" transformation probabilities when using |
---|
505 | * the specified stop parameter. |
---|
506 | * |
---|
507 | * @param testValue the value of the test instance |
---|
508 | * @param stop the stop parameter |
---|
509 | * @param params a wrapper of the parameters to be computed: |
---|
510 | * "sphere" the sphere size |
---|
511 | * "avgprob" the average transformation probability |
---|
512 | * "minProb" the smallest transformation probability |
---|
513 | * @return the values are returned in "params" object. |
---|
514 | * |
---|
515 | */ |
---|
516 | private void calculateSphereSize(int testvalue, double stop, |
---|
517 | KStarWrapper params) { |
---|
518 | String debug = "(KStarNominalAttribute.calculateSphereSize) "; |
---|
519 | int i, thiscount; |
---|
520 | double tprob, tval = 0.0, t1 = 0.0; |
---|
521 | double sphere, minprob = 1.0, transprob = 0.0; |
---|
522 | |
---|
523 | for(i = 0; i < m_Distribution.length; i++) { |
---|
524 | thiscount = m_Distribution[i]; |
---|
525 | if ( thiscount != 0 ) { |
---|
526 | if ( testvalue == i ) { |
---|
527 | tprob = (stop + (1 - stop) / m_Distribution.length) / m_TotalCount; |
---|
528 | tval += tprob * thiscount; |
---|
529 | t1 += tprob * tprob * thiscount; |
---|
530 | } |
---|
531 | else { |
---|
532 | tprob = ((1 - stop) / m_Distribution.length) / m_TotalCount; |
---|
533 | tval += tprob * thiscount; |
---|
534 | t1 += tprob * tprob * thiscount; |
---|
535 | } |
---|
536 | if ( minprob > tprob * m_TotalCount ) { |
---|
537 | minprob = tprob * m_TotalCount; |
---|
538 | } |
---|
539 | } |
---|
540 | } |
---|
541 | transprob = tval; |
---|
542 | sphere = (t1 == 0) ? 0 : ((tval * tval) / t1); |
---|
543 | // return values ... Yck!!! |
---|
544 | params.sphere = sphere; |
---|
545 | params.avgProb = transprob; |
---|
546 | params.minProb = minprob; |
---|
547 | } |
---|
548 | |
---|
549 | /** |
---|
550 | * Calculates the nominal probability function defined as: |
---|
551 | * P(i|j) = (1-stop) * P(i) + ((i==j) ? stop : 0) |
---|
552 | * In this case, it calculates the transformation probability of the |
---|
553 | * indexed test attribute to the indexed train attribute. |
---|
554 | * |
---|
555 | * @param test the test instance |
---|
556 | * @param train the train instance |
---|
557 | * @param col the attribute index |
---|
558 | * @return the value of the tranformation probability. |
---|
559 | * |
---|
560 | */ |
---|
561 | private double PStar(Instance test, Instance train, int col, double stop) { |
---|
562 | String debug = "(KStarNominalAttribute.PStar) "; |
---|
563 | double pstar; |
---|
564 | int numvalues = 0; |
---|
565 | try { |
---|
566 | numvalues = test.attribute(col).numValues(); |
---|
567 | } catch (Exception ex) { |
---|
568 | ex.printStackTrace(); |
---|
569 | } |
---|
570 | if ( (int)test.value(col) == (int)train.value(col) ) { |
---|
571 | pstar = stop + (1 - stop) / numvalues; |
---|
572 | } |
---|
573 | else { |
---|
574 | pstar = (1 - stop) / numvalues; |
---|
575 | } |
---|
576 | return pstar; |
---|
577 | } |
---|
578 | |
---|
579 | /** |
---|
580 | * Calculates the distribution, in the dataset, of the indexed nominal |
---|
581 | * attribute values. It also counts the actual number of training instances |
---|
582 | * that contributed (those with non-missing values) to calculate the |
---|
583 | * distribution. |
---|
584 | */ |
---|
585 | private void generateAttrDistribution() { |
---|
586 | String debug = "(KStarNominalAttribute.generateAttrDistribution)"; |
---|
587 | m_Distribution = new int[ m_TrainSet.attribute(m_AttrIndex).numValues() ]; |
---|
588 | int i; |
---|
589 | Instance train; |
---|
590 | for (i=0; i < m_NumInstances; i++) { |
---|
591 | train = m_TrainSet.instance(i); |
---|
592 | if ( !train.isMissing(m_AttrIndex) ) { |
---|
593 | m_TotalCount++; |
---|
594 | m_Distribution[(int)train.value(m_AttrIndex)]++; |
---|
595 | } |
---|
596 | } |
---|
597 | } |
---|
598 | |
---|
599 | /** |
---|
600 | * Sets the options. |
---|
601 | * |
---|
602 | */ |
---|
603 | public void setOptions(int missingmode, int blendmethod, int blendfactor) { |
---|
604 | m_MissingMode = missingmode; |
---|
605 | m_BlendMethod = blendmethod; |
---|
606 | m_BlendFactor = blendfactor; |
---|
607 | } |
---|
608 | |
---|
609 | /** |
---|
610 | * Returns the revision string. |
---|
611 | * |
---|
612 | * @return the revision |
---|
613 | */ |
---|
614 | public String getRevision() { |
---|
615 | return RevisionUtils.extract("$Revision: 1.7 $"); |
---|
616 | } |
---|
617 | } // class |
---|