source: src/main/java/weka/classifiers/bayes/AODEsr.java @ 11

Last change on this file since 11 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 27.6 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    AODEsr.java
19 *    Copyright (C) 2007
20 *    Algorithm developed by: Fei ZHENG and Geoff Webb
21 *    Code written by: Fei ZHENG and Janice Boughton
22 */
23
24package weka.classifiers.bayes;
25
26import weka.classifiers.Classifier;
27import weka.classifiers.AbstractClassifier;
28import weka.classifiers.UpdateableClassifier;
29import weka.core.Capabilities;
30import weka.core.Instance;
31import weka.core.Instances;
32import weka.core.Option;
33import weka.core.OptionHandler;
34import weka.core.RevisionUtils;
35import weka.core.TechnicalInformation;
36import weka.core.TechnicalInformationHandler;
37import weka.core.Utils;
38import weka.core.WeightedInstancesHandler;
39import weka.core.Capabilities.Capability;
40import weka.core.TechnicalInformation.Field;
41import weka.core.TechnicalInformation.Type;
42
43import java.util.Enumeration;
44import java.util.Vector;
45
46/**
47 *
48 <!-- globalinfo-start -->
49 * AODEsr augments AODE with Subsumption Resolution.AODEsr detects specializations between two attribute values at classification time and deletes the generalization attribute value.<br/>
50 * For more information, see:<br/>
51 * Fei Zheng, Geoffrey I. Webb: Efficient Lazy Elimination for Averaged-One Dependence Estimators. In: Proceedings of the Twenty-third International Conference on Machine  Learning (ICML 2006), 1113-1120, 2006.
52 * <p/>
53 <!-- globalinfo-end -->
54 *
55 <!-- technical-bibtex-start -->
56 * BibTeX:
57 * <pre>
58 * &#64;inproceedings{Zheng2006,
59 *    author = {Fei Zheng and Geoffrey I. Webb},
60 *    booktitle = {Proceedings of the Twenty-third International Conference on Machine  Learning (ICML 2006)},
61 *    pages = {1113-1120},
62 *    publisher = {ACM Press},
63 *    title = {Efficient Lazy Elimination for Averaged-One Dependence Estimators},
64 *    year = {2006},
65 *    ISBN = {1-59593-383-2}
66 * }
67 * </pre>
68 * <p/>
69 <!-- technical-bibtex-end -->
70 *
71 <!-- options-start -->
72 * Valid options are: <p/>
73 *
74 * <pre> -D
75 *  Output debugging information
76 * </pre>
77 *
78 * <pre> -C
79 *  Impose a critcal value for specialization-generalization relationship
80 *  (default is 50)</pre>
81 *
82 * <pre> -F
83 *  Impose a frequency limit for superParents
84 *  (default is 1)</pre>
85 *
86 * <pre> -L
87 *  Using Laplace estimation
88 *  (default is m-esimation (m=1))</pre>
89 *
90 * <pre> -M
91 *  Weight value for m-estimation
92 *  (default is 1.0)</pre>
93 *
94 <!-- options-end -->
95 *
96 * @author Fei Zheng
97 * @author Janice Boughton
98 * @version $Revision: 5928 $
99 */
100public class AODEsr extends AbstractClassifier
101    implements OptionHandler, WeightedInstancesHandler, UpdateableClassifier,
102               TechnicalInformationHandler {
103
104  /** for serialization */
105  static final long serialVersionUID = 5602143019183068848L;
106
107  /**
108   * 3D array (m_NumClasses * m_TotalAttValues * m_TotalAttValues)
109   * of attribute counts, i.e. the number of times an attribute value occurs
110   * in conjunction with another attribute value and a class value. 
111   */
112  private double [][][] m_CondiCounts;
113 
114  /**
115   * 2D array (m_TotalAttValues * m_TotalAttValues) of attributes counts.
116   * similar to m_CondiCounts, but ignoring class value.
117   */ 
118  private double [][] m_CondiCountsNoClass; 
119   
120  /** The number of times each class value occurs in the dataset */
121  private double [] m_ClassCounts;
122   
123  /** The sums of attribute-class counts 
124   *    -- if there are no missing values for att, then
125   *       m_SumForCounts[classVal][att] will be the same as
126   *       m_ClassCounts[classVal]
127   */
128  private double [][] m_SumForCounts;
129
130  /** The number of classes */
131  private int m_NumClasses;
132 
133  /** The number of attributes in dataset, including class */
134  private int m_NumAttributes;
135   
136  /** The number of instances in the dataset */
137  private int m_NumInstances;
138   
139  /** The index of the class attribute */
140  private int m_ClassIndex;
141   
142  /** The dataset */
143  private Instances m_Instances;
144   
145  /**
146   * The total number of values (including an extra for each attribute's
147   * missing value, which are included in m_CondiCounts) for all attributes
148   * (not including class).  Eg. for three atts each with two possible values,
149   * m_TotalAttValues would be 9 (6 values + 3 missing).
150   * This variable is used when allocating space for m_CondiCounts matrix.
151   */
152  private int m_TotalAttValues;
153   
154  /** The starting index (in the m_CondiCounts matrix) of the values for each attribute */
155  private int [] m_StartAttIndex;
156   
157  /** The number of values for each attribute */
158  private int [] m_NumAttValues;
159   
160  /** The frequency of each attribute value for the dataset */
161  private double [] m_Frequencies;
162
163  /** The number of valid class values observed in dataset
164   *  -- with no missing classes, this number is the same as m_NumInstances.
165   */
166  private double m_SumInstances;
167
168  /** An att's frequency must be this value or more to be a superParent */
169  private int m_Limit = 1;
170
171  /** If true, outputs debugging info */
172  private boolean m_Debug = false;
173 
174  /** m value for m-estimation */
175  protected  double m_MWeight = 1.0;
176 
177  /** Using LapLace estimation or not*/
178  private boolean m_Laplace = false;
179 
180  /** the critical value for the specialization-generalization */
181  private int m_Critical = 50;
182
183 
184  /**
185   * Returns a string describing this classifier
186   * @return a description of the classifier suitable for
187   * displaying in the explorer/experimenter gui
188   */
189  public String globalInfo() {
190
191    return "AODEsr augments AODE with Subsumption Resolution."
192      +"AODEsr detects specializations between two attribute "
193      +"values at classification time and deletes the generalization "
194      +"attribute value.\n"
195      +"For more information, see:\n"
196      + getTechnicalInformation().toString();
197  }
198 
199  /**
200   * Returns an instance of a TechnicalInformation object, containing
201   * detailed information about the technical background of this class,
202   * e.g., paper reference or book this class is based on.
203   *
204   * @return the technical information about this class
205   */
206  public TechnicalInformation getTechnicalInformation() {
207    TechnicalInformation        result;
208
209    result = new TechnicalInformation(Type.INPROCEEDINGS);
210    result.setValue(Field.AUTHOR, "Fei Zheng and Geoffrey I. Webb");
211    result.setValue(Field.YEAR, "2006");
212    result.setValue(Field.TITLE, "Efficient Lazy Elimination for Averaged-One Dependence Estimators");
213    result.setValue(Field.PAGES, "1113-1120");
214    result.setValue(Field.BOOKTITLE, "Proceedings of the Twenty-third International Conference on Machine  Learning (ICML 2006)");
215    result.setValue(Field.PUBLISHER, "ACM Press");
216    result.setValue(Field.ISBN, "1-59593-383-2");
217
218    return result;
219  }
220
221 /**
222  * Returns default capabilities of the classifier.
223  *
224  * @return      the capabilities of this classifier
225  */
226  public Capabilities getCapabilities() {
227    Capabilities result = super.getCapabilities();
228    result.disableAll();
229
230    // attributes
231    result.enable(Capability.NOMINAL_ATTRIBUTES);
232    result.enable(Capability.MISSING_VALUES);
233
234    // class
235    result.enable(Capability.NOMINAL_CLASS);
236    result.enable(Capability.MISSING_CLASS_VALUES);
237
238    // instances
239    result.setMinimumNumberInstances(0);
240
241    return result;
242  }
243
244  /**
245   * Generates the classifier.
246   *
247   * @param instances set of instances serving as training data
248   * @throws Exception if the classifier has not been generated
249   * successfully
250   */
251  public void buildClassifier(Instances instances) throws Exception {
252
253    // can classifier handle the data?
254    getCapabilities().testWithFail(instances);
255
256    // remove instances with missing class
257    m_Instances = new Instances(instances);
258    m_Instances.deleteWithMissingClass();
259
260    // reset variable for this fold
261    m_SumInstances = 0;
262    m_ClassIndex = instances.classIndex();
263    m_NumInstances = m_Instances.numInstances();
264    m_NumAttributes = instances.numAttributes();
265    m_NumClasses = instances.numClasses();
266
267    // allocate space for attribute reference arrays
268    m_StartAttIndex = new int[m_NumAttributes];
269    m_NumAttValues = new int[m_NumAttributes];
270 
271    m_TotalAttValues = 0;
272    for(int i = 0; i < m_NumAttributes; i++) {
273       if(i != m_ClassIndex) {
274          m_StartAttIndex[i] = m_TotalAttValues;
275          m_NumAttValues[i] = m_Instances.attribute(i).numValues();
276          m_TotalAttValues += m_NumAttValues[i] + 1;
277          // + 1 so room for missing value count
278       } else {
279          // m_StartAttIndex[i] = -1;  // class isn't included
280          m_NumAttValues[i] = m_NumClasses;
281       }
282    }
283
284    // allocate space for counts and frequencies
285    m_CondiCounts = new double[m_NumClasses][m_TotalAttValues][m_TotalAttValues];
286    m_ClassCounts = new double[m_NumClasses];
287    m_SumForCounts = new double[m_NumClasses][m_NumAttributes];
288    m_Frequencies = new double[m_TotalAttValues];
289    m_CondiCountsNoClass = new double[m_TotalAttValues][m_TotalAttValues];
290   
291    // calculate the counts
292    for(int k = 0; k < m_NumInstances; k++) {
293       addToCounts((Instance)m_Instances.instance(k));
294    }
295
296    // free up some space
297    m_Instances = new Instances(m_Instances, 0);
298  }
299 
300
301  /**
302   * Updates the classifier with the given instance.
303   *
304   * @param instance the new training instance to include in the model
305   * @throws Exception if the instance could not be incorporated in
306   * the model.
307   */
308  public void updateClassifier(Instance instance) {
309    this.addToCounts(instance);
310  }
311
312  /**
313   * Puts an instance's values into m_CondiCounts, m_ClassCounts and
314   * m_SumInstances.
315   *
316   * @param instance the instance whose values are to be put into the
317   *                 counts variables
318   */
319  private void addToCounts(Instance instance) {
320 
321    double [] countsPointer;
322    double [] countsNoClassPointer;
323 
324    if(instance.classIsMissing())
325       return;   // ignore instances with missing class
326
327    int classVal = (int)instance.classValue();
328    double weight = instance.weight();
329 
330    m_ClassCounts[classVal] += weight;
331    m_SumInstances += weight;
332   
333    // store instance's att val indexes in an array, b/c accessing it
334    // in loop(s) is more efficient
335    int [] attIndex = new int[m_NumAttributes];
336    for(int i = 0; i < m_NumAttributes; i++) {
337       if(i == m_ClassIndex)
338          attIndex[i] = -1;  // we don't use the class attribute in counts
339       else {
340          if(instance.isMissing(i))
341             attIndex[i] = m_StartAttIndex[i] + m_NumAttValues[i];
342          else
343             attIndex[i] = m_StartAttIndex[i] + (int)instance.value(i);
344       }
345    }
346
347    for(int Att1 = 0; Att1 < m_NumAttributes; Att1++) {
348       if(attIndex[Att1] == -1)
349          continue;   // avoid pointless looping as Att1 is currently the class attribute
350
351       m_Frequencies[attIndex[Att1]] += weight;
352       
353       // if this is a missing value, we don't want to increase sumforcounts
354       if(!instance.isMissing(Att1))
355          m_SumForCounts[classVal][Att1] += weight;
356
357       // save time by referencing this now, rather than repeatedly in the loop
358       countsPointer = m_CondiCounts[classVal][attIndex[Att1]];
359       countsNoClassPointer = m_CondiCountsNoClass[attIndex[Att1]];
360
361       for(int Att2 = 0; Att2 < m_NumAttributes; Att2++) {
362          if(attIndex[Att2] != -1) {
363             countsPointer[attIndex[Att2]] += weight;
364             countsNoClassPointer[attIndex[Att2]] += weight;
365          }
366       }
367    }
368  }
369 
370 
371  /**
372   * Calculates the class membership probabilities for the given test
373   * instance.
374   *
375   * @param instance the instance to be classified
376   * @return predicted class probability distribution
377   * @throws Exception if there is a problem generating the prediction
378   */
379  public double [] distributionForInstance(Instance instance) throws Exception {
380
381    // accumulates posterior probabilities for each class
382    double [] probs = new double[m_NumClasses];
383
384    // index for parent attribute value, and a count of parents used
385    int pIndex, parentCount; 
386
387    int [] SpecialGeneralArray = new int[m_NumAttributes];
388   
389    // pointers for efficiency
390    double [][] countsForClass;
391    double [] countsForClassParent;
392    double [] countsForAtti;
393    double [] countsForAttj;
394
395    // store instance's att values in an int array, so accessing them
396    // is more efficient in loop(s).
397    int [] attIndex = new int[m_NumAttributes];
398    for(int att = 0; att < m_NumAttributes; att++) {
399       if(instance.isMissing(att) || att == m_ClassIndex)
400          attIndex[att] = -1; // can't use class & missing vals in calculations
401       else
402          attIndex[att] = m_StartAttIndex[att] + (int)instance.value(att);
403    }
404    // -1 indicates attribute is not a generalization of any other attributes
405    for(int i = 0; i < m_NumAttributes; i++) {
406       SpecialGeneralArray[i] = -1;
407    }
408
409    // calculate the specialization-generalization array
410    for(int i = 0; i < m_NumAttributes; i++){
411       // skip i if it's the class or is missing
412       if(attIndex[i] == -1)  continue;
413       countsForAtti = m_CondiCountsNoClass[attIndex[i]];
414 
415       for(int j = 0; j < m_NumAttributes; j++) {
416          // skip j if it's the class, missing, is i or a generalization of i
417          if((attIndex[j] == -1) || (i == j) || (SpecialGeneralArray[j] == i))
418            continue;
419         
420          countsForAttj = m_CondiCountsNoClass[attIndex[j]];
421
422          // check j's frequency is above critical value
423          if(countsForAttj[attIndex[j]] > m_Critical) {
424
425             // skip j if the frequency of i and j together is not equivalent
426             // to the frequency of j alone
427             if(countsForAttj[attIndex[j]] == countsForAtti[attIndex[j]]) {
428
429             // if attributes i and j are both a specialization of each other
430             // avoid deleting both by skipping j
431                if((countsForAttj[attIndex[j]] == countsForAtti[attIndex[i]])
432                 && (i < j)){
433                  continue;
434                } else {
435                    // set the specialization relationship
436                    SpecialGeneralArray[i] = j;
437                    break; // break out of j loop because a specialization has been found
438                }
439             }
440          }
441       }
442    }
443 
444    // calculate probabilities for each possible class value
445    for(int classVal = 0; classVal < m_NumClasses; classVal++) {
446 
447       probs[classVal] = 0;
448       double x = 0;
449       parentCount = 0;
450 
451       countsForClass = m_CondiCounts[classVal];
452
453       // each attribute has a turn of being the parent
454       for(int parent = 0; parent < m_NumAttributes; parent++) {
455          if(attIndex[parent] == -1)
456             continue;  // skip class attribute or missing value
457
458          // determine correct index for the parent in m_CondiCounts matrix
459          pIndex = attIndex[parent];
460
461          // check that the att value has a frequency of m_Limit or greater
462          if(m_Frequencies[pIndex] < m_Limit) 
463             continue;
464         
465          // delete the generalization attributes.
466          if(SpecialGeneralArray[parent] != -1)
467             continue;
468
469          countsForClassParent = countsForClass[pIndex];
470
471          // block the parent from being its own child
472          attIndex[parent] = -1;
473
474          parentCount++;
475
476          double classparentfreq = countsForClassParent[pIndex];
477
478          // find the number of missing values for parent's attribute
479          double missing4ParentAtt = 
480            m_Frequencies[m_StartAttIndex[parent] + m_NumAttValues[parent]];
481
482          // calculate the prior probability -- P(parent & classVal)
483           if (m_Laplace){
484             x = LaplaceEstimate(classparentfreq, m_SumInstances - missing4ParentAtt, 
485                                    m_NumClasses * m_NumAttValues[parent]);
486          } else {
487         
488             x = MEstimate(classparentfreq, m_SumInstances - missing4ParentAtt, 
489                                    m_NumClasses * m_NumAttValues[parent]);
490          }
491
492
493   
494          // take into account the value of each attribute
495          for(int att = 0; att < m_NumAttributes; att++) {
496             if(attIndex[att] == -1) // skip class attribute or missing value
497                continue;
498             // delete the generalization attributes.
499             if(SpecialGeneralArray[att] != -1)
500                continue;
501           
502 
503             double missingForParentandChildAtt = 
504                      countsForClassParent[m_StartAttIndex[att] + m_NumAttValues[att]];
505
506             if (m_Laplace){
507                x *= LaplaceEstimate(countsForClassParent[attIndex[att]], 
508                    classparentfreq - missingForParentandChildAtt, m_NumAttValues[att]);
509             } else {
510                x *= MEstimate(countsForClassParent[attIndex[att]], 
511                    classparentfreq - missingForParentandChildAtt, m_NumAttValues[att]);
512             }
513          }
514
515          // add this probability to the overall probability
516          probs[classVal] += x;
517 
518          // unblock the parent
519          attIndex[parent] = pIndex;
520       }
521 
522       // check that at least one att was a parent
523       if(parentCount < 1) {
524
525          // do plain naive bayes conditional prob
526          probs[classVal] = NBconditionalProb(instance, classVal);
527          //probs[classVal] = Double.NaN;
528
529       } else {
530 
531          // divide by number of parent atts to get the mean
532          probs[classVal] /= (double)(parentCount);
533       }
534    }
535    Utils.normalize(probs);
536    return probs;
537  }
538
539
540  /**
541   * Calculates the probability of the specified class for the given test
542   * instance, using naive Bayes.
543   *
544   * @param instance the instance to be classified
545   * @param classVal the class for which to calculate the probability
546   * @return predicted class probability
547   * @throws Exception if there is a problem generating the prediction
548   */
549  public double NBconditionalProb(Instance instance, int classVal)
550                                                     throws Exception {
551    double prob;
552    int attIndex;
553    double [][] pointer;
554
555    // calculate the prior probability
556    if(m_Laplace) {
557       prob = LaplaceEstimate(m_ClassCounts[classVal],m_SumInstances,m_NumClasses); 
558    } else {
559       prob = MEstimate(m_ClassCounts[classVal], m_SumInstances, m_NumClasses);
560    }
561    pointer = m_CondiCounts[classVal];
562   
563    // consider effect of each att value
564    for(int att = 0; att < m_NumAttributes; att++) {
565       if(att == m_ClassIndex || instance.isMissing(att))
566          continue;
567       
568       // determine correct index for att in m_CondiCounts
569       attIndex = m_StartAttIndex[att] + (int)instance.value(att);
570       if (m_Laplace){
571         prob *= LaplaceEstimate((double)pointer[attIndex][attIndex], 
572                   (double)m_SumForCounts[classVal][att], m_NumAttValues[att]);
573       } else {
574           prob *= MEstimate((double)pointer[attIndex][attIndex], 
575                   (double)m_SumForCounts[classVal][att], m_NumAttValues[att]);
576       }
577    }
578    return prob;
579  }
580
581
582  /**
583   * Returns the probability estimate, using m-estimate
584   *
585   * @param frequency frequency of value of interest
586   * @param total count of all values
587   * @param numValues number of different values
588   * @return the probability estimate
589   */
590  public double MEstimate(double frequency, double total,
591                          double numValues) {
592   
593    return (frequency + m_MWeight / numValues) / (total + m_MWeight);
594  }
595   
596  /**
597   * Returns the probability estimate, using laplace correction
598   *
599   * @param frequency frequency of value of interest
600   * @param total count of all values
601   * @param numValues number of different values
602   * @return the probability estimate
603   */
604  public double LaplaceEstimate(double frequency, double total,
605                                double numValues) {
606   
607    return (frequency + 1.0) / (total + numValues);
608  }
609   
610   
611  /**
612   * Returns an enumeration describing the available options
613   *
614   * @return an enumeration of all the available options
615   */
616  public Enumeration listOptions() {
617
618    Vector newVector = new Vector(5);
619       
620    newVector.addElement(
621       new Option("\tOutput debugging information\n",
622                  "D", 0,"-D"));
623    newVector.addElement(
624       new Option("\tImpose a critcal value for specialization-generalization relationship\n"
625                  + "\t(default is 50)", "C", 1,"-C"));
626    newVector.addElement(
627       new Option("\tImpose a frequency limit for superParents\n"
628                  + "\t(default is 1)", "F", 2,"-F"));
629    newVector.addElement(
630       new Option("\tUsing Laplace estimation\n"
631                  + "\t(default is m-esimation (m=1))",
632                  "L", 3,"-L"));
633    newVector.addElement(
634       new Option("\tWeight value for m-estimation\n"
635                  + "\t(default is 1.0)", "M", 4,"-M"));
636
637    return newVector.elements();
638  }
639
640
641  /**
642   * Parses a given list of options. <p/>
643   *
644   <!-- options-start -->
645   * Valid options are: <p/>
646   *
647   * <pre> -D
648   *  Output debugging information
649   * </pre>
650   *
651   * <pre> -C
652   *  Impose a critcal value for specialization-generalization relationship
653   *  (default is 50)</pre>
654   *
655   * <pre> -F
656   *  Impose a frequency limit for superParents
657   *  (default is 1)</pre>
658   *
659   * <pre> -L
660   *  Using Laplace estimation
661   *  (default is m-esimation (m=1))</pre>
662   *
663   * <pre> -M
664   *  Weight value for m-estimation
665   *  (default is 1.0)</pre>
666   *
667   <!-- options-end -->
668   *
669   * @param options the list of options as an array of strings
670   * @throws Exception if an option is not supported
671   */
672  public void setOptions(String[] options) throws Exception {
673
674    m_Debug = Utils.getFlag('D', options);
675
676    String Critical = Utils.getOption('C', options);
677    if(Critical.length() != 0) 
678       m_Critical = Integer.parseInt(Critical);
679    else
680       m_Critical = 50;
681   
682    String Freq = Utils.getOption('F', options);
683    if(Freq.length() != 0) 
684       m_Limit = Integer.parseInt(Freq);
685    else
686       m_Limit = 1;
687   
688    m_Laplace = Utils.getFlag('L', options);
689    String MWeight = Utils.getOption('M', options); 
690    if(MWeight.length() != 0) {
691       if(m_Laplace)
692          throw new Exception("weight for m-estimate is pointless if using laplace estimation!");
693       m_MWeight = Double.parseDouble(MWeight);
694    } else
695       m_MWeight = 1.0;
696   
697    Utils.checkForRemainingOptions(options);
698  }
699   
700  /**
701   * Gets the current settings of the classifier.
702   *
703   * @return an array of strings suitable for passing to setOptions
704   */
705  public String [] getOptions() {
706       
707    Vector result  = new Vector();
708
709    if (m_Debug)
710       result.add("-D");
711
712    result.add("-F");
713    result.add("" + m_Limit);
714
715    if (m_Laplace) {
716       result.add("-L");
717    } else {
718       result.add("-M");
719       result.add("" + m_MWeight);
720    }
721       
722    result.add("-C");
723    result.add("" + m_Critical);
724
725    return (String[]) result.toArray(new String[result.size()]);
726  }
727 
728  /**
729   * Returns the tip text for this property
730   * @return tip text for this property suitable for
731   * displaying in the explorer/experimenter gui
732   */
733  public String mestWeightTipText() {
734    return "Set the weight for m-estimate.";
735  }
736
737  /**
738   * Sets the weight for m-estimate
739   *
740   * @param w the weight
741   */
742  public void setMestWeight(double w) {
743    if (getUseLaplace()) {
744       System.out.println(
745          "Weight is only used in conjunction with m-estimate - ignored!");
746    } else {
747      if(w > 0)
748         m_MWeight = w;
749      else
750         System.out.println("M-Estimate Weight must be greater than 0!");
751    }
752  }
753
754  /**
755   * Gets the weight used in m-estimate
756   *
757   * @return the weight for m-estimation
758   */
759  public double getMestWeight() {
760    return m_MWeight;
761  }
762
763  /**
764   * Returns the tip text for this property
765   * @return tip text for this property suitable for
766   * displaying in the explorer/experimenter gui
767   */
768  public String useLaplaceTipText() {
769    return "Use Laplace correction instead of m-estimation.";
770  }
771
772  /**
773   * Gets if laplace correction is being used.
774   *
775   * @return Value of m_Laplace.
776   */
777  public boolean getUseLaplace() {
778    return m_Laplace;
779  }
780
781  /**
782   * Sets if laplace correction is to be used.
783   *
784   * @param value Value to assign to m_Laplace.
785   */
786  public void setUseLaplace(boolean value) {
787    m_Laplace = value;
788  }
789
790  /**
791   * Returns the tip text for this property
792   * @return tip text for this property suitable for
793   * displaying in the explorer/experimenter gui
794   */
795  public String frequencyLimitTipText() {
796    return "Attributes with a frequency in the train set below "
797           + "this value aren't used as parents.";
798  }
799
800  /**
801   * Sets the frequency limit
802   *
803   * @param f the frequency limit
804   */
805  public void setFrequencyLimit(int f) {
806    m_Limit = f;
807  }
808
809  /**
810   * Gets the frequency limit.
811   *
812   * @return the frequency limit
813   */
814  public int getFrequencyLimit() {
815    return m_Limit;
816  }
817
818  /**
819   * Returns the tip text for this property
820   * @return tip text for this property suitable for
821   * displaying in the explorer/experimenter gui
822   */
823  public String criticalValueTipText() {
824    return "Specify critical value for specialization-generalization "
825           + "relationship (default 50).";
826  }
827
828  /**
829   * Sets the critical value
830   *
831   * @param c the critical value
832   */
833  public void setCriticalValue(int c) {
834    m_Critical = c;
835  }
836
837  /**
838   * Gets the critical value.
839   *
840   * @return the critical value
841   */
842  public int getCriticalValue() {
843    return m_Critical;
844  }
845
846  /**
847   * Returns a description of the classifier.
848   *
849   * @return a description of the classifier as a string.
850   */
851  public String toString() {
852 
853    StringBuffer text = new StringBuffer();
854       
855    text.append("The AODEsr Classifier");
856    if (m_Instances == null) {
857       text.append(": No model built yet.");
858    } else {
859       try {
860          for (int i = 0; i < m_NumClasses; i++) {
861             // print to string, the prior probabilities of class values
862             text.append("\nClass " + m_Instances.classAttribute().value(i) +
863                       ": Prior probability = " + Utils.
864                          doubleToString(((m_ClassCounts[i] + 1)
865                       /(m_SumInstances + m_NumClasses)), 4, 2)+"\n\n");
866          }
867               
868          text.append("Dataset: " + m_Instances.relationName() + "\n"
869                      + "Instances: " + m_NumInstances + "\n"
870                      + "Attributes: " + m_NumAttributes + "\n"
871                      + "Frequency limit for superParents: " + m_Limit + "\n"
872                      + "Critical value for the specializtion-generalization "
873                      + "relationship: " + m_Critical + "\n");
874          if(m_Laplace) {
875            text.append("Using LapLace estimation.");
876          } else {
877              text.append("Using m-estimation, m = " + m_MWeight); 
878          }
879       } catch (Exception ex) {
880          text.append(ex.getMessage());
881       }
882    }
883    return text.toString();
884  }
885 
886  /**
887   * Returns the revision string.
888   *
889   * @return            the revision
890   */
891  public String getRevision() {
892    return RevisionUtils.extract("$Revision: 5928 $");
893  }
894   
895  /**
896   * Main method for testing this class.
897   *
898   * @param argv the options
899   */
900  public static void main(String [] argv) {
901    runClassifier(new AODEsr(), argv);
902  }
903}
904
Note: See TracBrowser for help on using the repository browser.