source: branches/MetisMQI/src/main/java/weka/classifiers/rules/Ridor.java

Last change on this file was 29, checked in by gnappo, 14 years ago

Taggata versione per la demo e aggiunto branch.

File size: 52.0 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    Ridor.java
19 *    Copyright (C) 2001 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.classifiers.rules;
24
25import weka.classifiers.Classifier;
26import weka.classifiers.AbstractClassifier;
27import weka.core.AdditionalMeasureProducer;
28import weka.core.Attribute;
29import weka.core.Capabilities;
30import weka.core.FastVector;
31import weka.core.Instance;
32import weka.core.Instances;
33import weka.core.Option;
34import weka.core.RevisionHandler;
35import weka.core.RevisionUtils;
36import weka.core.UnsupportedClassTypeException;
37import weka.core.Utils;
38import weka.core.WeightedInstancesHandler;
39import weka.core.Capabilities.Capability;
40
41import java.io.Serializable;
42import java.util.Enumeration;
43import java.util.Random;
44import java.util.Vector;
45
46/**
47 <!-- globalinfo-start -->
48 * An implementation of a RIpple-DOwn Rule learner.<br/>
49 * <br/>
50 * It generates a default rule first and then the exceptions for the default rule with the least (weighted) error rate.  Then it generates the "best" exceptions for each exception and iterates until pure.  Thus it performs a tree-like expansion of exceptions.The exceptions are a set of rules that predict classes other than the default. IREP is used to generate the exceptions.<br/>
51 * <br/>
52 * For more information about Ripple-Down Rules, see:<br/>
53 * <br/>
54 * Brian R. Gaines, Paul Compton (1995). Induction of Ripple-Down Rules Applied to Modeling Large Databases. J. Intell. Inf. Syst.. 5(3):211-228.
55 * <p/>
56 <!-- globalinfo-end -->
57 *
58 * There are five inner classes defined in this class. <br>
59 * The first is Ridor_node, which implements one node in the Ridor tree.  It's basically
60 * composed of a default class and a set of exception rules to the default class.<br>
61 * The second inner class is RidorRule, which implements a single exception rule
62 * using REP.<br>
63 * The last three inner classes are only used in RidorRule.  They are Antd, NumericAntd
64 * and NominalAntd, which all implement a single antecedent in the RidorRule. <br>
65 * The Antd class is an abstract class, which has two subclasses, NumericAntd and
66 * NominalAntd, to implement the corresponding abstract functions.  These two subclasses
67 * implement the functions related to a antecedent with a nominal attribute and a numeric
68 * attribute respectively.<p>
69 *
70 <!-- options-start -->
71 * Valid options are: <p/>
72 *
73 * <pre> -F &lt;number of folds&gt;
74 *  Set number of folds for IREP
75 *  One fold is used as pruning set.
76 *  (default 3)</pre>
77 *
78 * <pre> -S &lt;number of shuffles&gt;
79 *  Set number of shuffles to randomize
80 *  the data in order to get better rule.
81 *  (default 10)</pre>
82 *
83 * <pre> -A
84 *  Set flag of whether use the error rate
85 *  of all the data to select the default class
86 *  in each step. If not set, the learner will only use the error rate in the pruning data</pre>
87 *
88 * <pre> -M
89 *   Set flag of whether use the majority class as
90 *  the default class in each step instead of
91 *  choosing default class based on the error rate
92 *  (if the flag is not set)</pre>
93 *
94 * <pre> -N &lt;min. weights&gt;
95 *  Set the minimal weights of instances
96 *  within a split.
97 *  (default 2.0)</pre>
98 *
99 <!-- options-end -->
100 *
101 * @author Xin XU (xx5@cs.waikato.ac.nz)
102 * @version $Revision: 5928 $
103 */
104public class Ridor 
105  extends AbstractClassifier
106  implements AdditionalMeasureProducer, WeightedInstancesHandler {
107
108  /** for serialization */
109  static final long serialVersionUID = -7261533075088314436L;
110 
111  /** The number of folds to split data into Grow and Prune for IREP */
112  private int m_Folds = 3;
113   
114  /** The number of shuffles performed on the data for randomization */
115  private int m_Shuffle = 1;
116
117  /** Random object for randomization */
118  private Random m_Random = null;
119   
120  /** The seed to perform randomization */
121  private int m_Seed = 1;
122
123  /** Whether use error rate on all the data */
124  private boolean m_IsAllErr = false;
125
126  /** Whether use majority class as default class */
127  private boolean m_IsMajority = false;
128   
129  /** The root of Ridor */
130  private Ridor_node m_Root = null;
131   
132  /** The class attribute of the data */
133  private Attribute m_Class;
134
135  /** Statistics of the data */
136  private double m_Cover, m_Err;
137
138  /** The minimal number of instance weights within a split*/
139  private double m_MinNo = 2.0;
140   
141  /**
142   * Returns a string describing classifier
143   * @return a description suitable for
144   * displaying in the explorer/experimenter gui
145   */
146  public String globalInfo() {
147    return "An implementation of a RIpple-DOwn Rule learner.\n\n" 
148      + "It generates a default rule first and then the exceptions for the default rule "
149      + "with the least (weighted) error rate.  Then it generates the \"best\" exceptions for "
150      + "each exception and iterates until pure.  Thus it performs a tree-like expansion of "
151      + "exceptions."
152      + "The exceptions are a set of rules that predict classes other than the default. "
153      + "IREP is used to generate the exceptions.\n\n"
154      + "For more information about Ripple-Down Rules, see:\n\n";
155  }
156   
157  /**
158   * Private class implementing the single node of Ridor.
159   * It consists of a default class label, a set of exceptions to the default rule
160   * and the exceptions to each exception
161   */
162  private class Ridor_node 
163    implements Serializable, RevisionHandler {
164   
165    /** for serialization */
166    static final long serialVersionUID = -581370560157467677L;
167       
168    /** The default class label */
169    private double defClass = Double.NaN;
170       
171    /** The set of exceptions of the default rule.
172        Each element also has its own exceptions and the consequent of each rule
173        is determined by its exceptions */
174    private RidorRule[] rules = null;
175       
176    /** The exceptions of the exception rules */
177    private Ridor_node[] excepts = null; 
178
179    /** The level of this node */
180    private int level;
181
182    /**
183     * Gets the default class label
184     *
185     * @return the default class label
186     */
187    public double getDefClass() { 
188      return defClass; 
189    }
190   
191    /**
192     * Gets the set of exceptions
193     *
194     * @return the set of exceptions
195     */
196    public RidorRule[] getRules() { 
197      return rules; 
198    }
199   
200    /**
201     * Gets the exceptions of the exceptions rules
202     *
203     * @return the exceptions of the exceptions rules
204     */
205    public Ridor_node[] getExcepts() { 
206      return excepts; 
207    }
208
209    /**
210     * Builds a ripple-down manner rule learner.
211     *
212     * @param dataByClass the divided data by their class label. The real class
213     * labels of the instances are all set to 0
214     * @param lvl the level of the parent node
215     * @throws Exception if ruleset of this node cannot be built
216     */
217    public void findRules(Instances[] dataByClass, int lvl) throws Exception {
218      Vector finalRules = null;
219      int clas = -1;
220      double[] isPure = new double[dataByClass.length];
221      int numMajority = 0;
222           
223      level = lvl + 1;
224           
225      for(int h=0; h < dataByClass.length; h++){
226        isPure[h] = dataByClass[h].sumOfWeights();
227        if(Utils.grOrEq(isPure[h], m_Folds))
228          numMajority++;  // Count how many class labels have enough instances
229      }
230           
231      if(numMajority <= 1){                        // The data is pure or not enough
232        defClass = (double)Utils.maxIndex(isPure);
233        return;
234      }
235      double total = Utils.sum(isPure); 
236           
237      if(m_IsMajority){
238        defClass = (double)Utils.maxIndex(isPure);
239        Instances data = new Instances(dataByClass[(int)defClass]);
240        int index = data.classIndex();
241               
242        for(int j=0; j<data.numInstances(); j++)
243          data.instance(j).setClassValue(1);       // Set one class as default
244               
245        for(int k=0; k < dataByClass.length; k++)    // Merge into one dataset
246          if(k != (int)defClass){
247            if(data.numInstances() >= dataByClass[k].numInstances())
248              data = append(data, dataByClass[k]);
249            else data = append(dataByClass[k], data);
250          }
251               
252        data.setClassIndex(index);           // Position new class label
253               
254        double classCount = total - isPure[(int)defClass];
255        finalRules = new Vector();
256        buildRuleset(data, classCount, finalRules);
257        if(finalRules.size() == 0)           // No good rules built
258          return;
259      }
260      else{
261        double maxAcRt = isPure[Utils.maxIndex(isPure)] / total;
262               
263        // Find default class
264        for(int i=0; i < dataByClass.length; i++){
265          if(isPure[i] >= m_Folds){
266            Instances data = new Instances(dataByClass[i]);
267            int index = data.classIndex();
268                       
269            for(int j=0; j<data.numInstances(); j++)
270              data.instance(j).setClassValue(1);       // Set one class as default
271                       
272            for(int k=0; k < dataByClass.length; k++)    // Merge into one dataset
273              if(k != i){
274                if(data.numInstances() >= dataByClass[k].numInstances())
275                  data = append(data, dataByClass[k]);
276                else data = append(dataByClass[k], data);
277              }
278                       
279            data.setClassIndex(index);           // Position new class label
280                       
281            /* Build a set of rules */
282            double classCount = data.sumOfWeights() - isPure[i];
283            Vector ruleset = new Vector();
284            double wAcRt = buildRuleset(data, classCount, ruleset); 
285                       
286            if(Utils.gr(wAcRt, maxAcRt)){
287              finalRules = ruleset;
288              maxAcRt = wAcRt;
289              clas = i;
290            }
291          }
292        }
293               
294        if(finalRules == null){ // No good rules found, set majority class as default
295          defClass = (double)Utils.maxIndex(isPure);
296          return;
297        }
298               
299        defClass = (double)clas;
300      }
301                       
302      /* Store the exception rules and default class in this node */
303      int size = finalRules.size();
304      rules = new RidorRule[size];
305      excepts = new Ridor_node[size];
306      for(int l=0; l < size; l++)
307        rules[l] = (RidorRule)finalRules.elementAt(l);
308           
309      /* Build exceptions for each exception rule */
310      Instances[] uncovered = dataByClass; 
311      if(level == 1)  // The error of default rule
312        m_Err = total - uncovered[(int)defClass].sumOfWeights();                       
313
314      uncovered[(int)defClass] = new Instances(uncovered[(int)defClass], 0);   
315           
316      for(int m=0; m < size; m++){
317        /* The data covered by this rule, they are also deducted from the original data */
318        Instances[][] dvdData = divide(rules[m], uncovered);
319        Instances[] covered = dvdData[0];    // Data covered by the rule
320        //uncovered = dvdData[1];            // Data not covered by the rule
321        excepts[m] = new Ridor_node();
322        excepts[m].findRules(covered, level);// Find exceptions on the covered data
323      }
324    }
325
326    /**
327     * Private function to build a rule set and return the weighted avg of accuracy
328     * rate of rules in the set.
329     *
330     * @param insts the data used to build ruleset
331     * @param classCount the counts of the instances with the predicted class but not
332     *                   yet covered by the ruleset
333     * @param ruleset the ruleset to be built
334     * @return the weighted accuracy rate of the ruleset
335     * @throws Exception if the rules cannot be built properly
336     */
337    private double buildRuleset(Instances insts, double classCount, Vector ruleset) 
338      throws Exception {           
339      Instances data = new Instances(insts);
340      double wAcRt = 0;  // The weighted accuracy rate of this ruleset
341      double total = data.sumOfWeights();
342           
343      while( classCount >= m_Folds ){      // Data is not pure
344        RidorRule bestRule = null;
345        double bestWorthRate= -1;        // The best worth achieved by
346        double bestWorth = -1;           // randomization of the data
347               
348        RidorRule rule = new RidorRule();                               
349        rule.setPredictedClass(0);       // Predict the classes other than default
350               
351        for(int j = 0; j < m_Shuffle; j++){
352          if(m_Shuffle > 1)
353            data.randomize(m_Random);
354                   
355          rule.buildClassifier(data);
356                   
357          double wr, w; // Worth rate and worth
358          if(m_IsAllErr){
359            wr = (rule.getWorth()+rule.getAccuG()) / 
360              (rule.getCoverP()+rule.getCoverG());
361            w = rule.getWorth() + rule.getAccuG();
362          }
363          else{
364            wr = rule.getWorthRate();
365            w = rule.getWorth(); 
366          }
367                   
368          if(Utils.gr(wr, bestWorthRate) ||
369             (Utils.eq(wr, bestWorthRate) && Utils.gr(w, bestWorth))){
370            bestRule = rule;
371            bestWorthRate = wr;
372            bestWorth = w;
373          }
374        }
375               
376        if (bestRule == null)
377          throw new Exception("Something wrong here inside findRule()!");
378               
379        if(Utils.sm(bestWorthRate, 0.5) || (!bestRule.hasAntds()))
380          break;                       // No more good rules generated
381               
382        Instances newData = new Instances(data); 
383        data = new Instances(newData, 0);// Empty the data
384        classCount = 0;
385        double cover = 0;                // Coverage of this rule on whole data
386               
387        for(int l=0; l<newData.numInstances(); l++){
388          Instance datum = newData.instance(l);
389          if(!bestRule.isCover(datum)){// Data not covered by the previous rule
390            data.add(datum);
391            if(Utils.eq(datum.classValue(), 0)) 
392              classCount += datum.weight(); // The predicted class in the data
393          }
394          else cover += datum.weight();
395        }                       
396               
397        wAcRt += computeWeightedAcRt(bestWorthRate, cover, total);
398        ruleset.addElement(bestRule);                   
399      } 
400           
401      /* The weighted def. accuracy */
402      double wDefAcRt = (data.sumOfWeights()-classCount) / total;                   
403      wAcRt += wDefAcRt;
404           
405      return wAcRt;
406    }
407       
408    /**
409     * Private function to combine two data
410     *
411     * @param data1 the data to which data2 is appended
412     * @param data2 the data to be appended to data1
413     * @return the merged data
414     */
415    private Instances append(Instances data1, Instances data2){
416      Instances data = new Instances(data1);
417      for(int i=0; i<data2.numInstances(); i++)
418        data.add(data2.instance(i));
419           
420      return data;
421    }
422       
423    /**
424     * Compute the weighted average of accuracy rate of a certain rule
425     * Each rule is weighted by its coverage proportion in the whole data. 
426     * So the accuracy rate of one ruleset is actually
427     *
428     * (worth rate) * (coverage proportion)
429     *
430     *                               coverage of the rule on the whole data
431     * where coverage proportion = -----------------------------------------
432     *                              the whole data size fed into the ruleset
433     *
434     * @param worthRt the worth rate
435     * @param cover the coverage of the rule on the whole data
436     * @param total the total data size fed into the ruleset
437     * @return the weighted accuracy rate of this rule
438     */
439    private double computeWeightedAcRt(double worthRt, double cover, double total){
440         
441      return (worthRt * (cover/total)); 
442    }
443       
444    /**
445     * Builds an array of data according to their true class label
446     * Each bag of data is filtered through the rule specified and
447     * is totally covered by this rule. 
448     * Both the data covered and uncovered by the rule will be returned
449     * by the procedure. 
450     *
451     * @param rule the rule covering the data
452     * @param dataByClass the array of data to be covered by the rule
453     * @return the arrays of data both covered and not covered by the rule
454     */
455    private Instances[][] divide(RidorRule rule, Instances[] dataByClass){
456      int len = dataByClass.length;
457      Instances[][] dataBags = new Instances[2][len];
458           
459      for(int i=0; i < len; i++){
460        Instances[] dvdData = rule.coveredByRule(dataByClass[i]);
461        dataBags[0][i] = dvdData[0];     // Covered by the rule
462        dataBags[1][i] = dvdData[1];     // Not covered by the rule
463      }
464           
465      return dataBags;
466    }
467    /**
468     * The size of the certain node of Ridor, i.e. the
469     * number of rules generated within and below this node
470     *
471     * @return the size of this node
472     */
473    public int size(){
474      int size = 0;
475      if(rules != null){
476        for(int i=0; i < rules.length; i++)
477          size += excepts[i].size(); // The children's size
478        size += rules.length;          // This node's size
479      }
480      return size;
481    }
482       
483    /**
484     * Prints the all the rules of one node of Ridor.
485     *
486     * @return a textual description of one node of Ridor
487     */
488    public String toString(){
489      StringBuffer text =  new StringBuffer();
490           
491      if(level == 1)
492        text.append(m_Class.name() + " = " + m_Class.value((int)getDefClass())+
493                    "  ("+m_Cover+"/"+m_Err+")\n");
494      if(rules != null){
495        for(int i=0; i < rules.length; i++){
496          for(int j=0; j < level; j++)
497            text.append("         ");
498          String cl = m_Class.value((int)(excepts[i].getDefClass()));
499          text.append("  Except " + 
500                      rules[i].toString(m_Class.name(), cl)+
501                      "\n" + excepts[i].toString());
502        }
503      }
504           
505      return text.toString();
506    }
507   
508    /**
509     * Returns the revision string.
510     *
511     * @return          the revision
512     */
513    public String getRevision() {
514      return RevisionUtils.extract("$Revision: 5928 $");
515    }
516  }   
517
518  /**
519   * This class implements a single rule that predicts the 2-class distribution. 
520   *
521   * A rule consists of antecedents "AND"ed together and the consequent (class value)
522   * for the classification.  In this case, the consequent is the distribution of
523   * the available classes (always 2 classes) in the dataset. 
524   * In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to select
525   * an antecedent and Reduced Error Prunning (REP) is used to prune the rule.
526   *
527   */
528  private class RidorRule 
529    implements WeightedInstancesHandler, Serializable, RevisionHandler {
530       
531    /** for serialization */
532    static final long serialVersionUID = 4375199423973848157L;
533   
534    /** The internal representation of the class label to be predicted*/
535    private double m_Class = -1;       
536       
537    /** The class attribute of the data*/
538    private Attribute m_ClassAttribute;
539       
540    /** The vector of antecedents of this rule*/
541    protected FastVector m_Antds = null;
542       
543    /** The worth rate of this rule, in this case, accuracy rate in the pruning data*/
544    private double m_WorthRate = 0;
545       
546    /** The worth value of this rule, in this case, accurate # in pruning data*/
547    private double m_Worth = 0;
548       
549    /** The sum of weights of the data covered by this rule in the pruning data */
550    private double m_CoverP = 0;   
551       
552    /** The accurate and covered data of this rule in the growing data */
553    private double m_CoverG = 0, m_AccuG = 0;           
554 
555    /** The access functions for parameters */
556    public void setPredictedClass(double cl){  m_Class = cl; }
557    public double getPredictedClass(){ return m_Class; }
558       
559    /**
560     * Builds a single rule learner with REP dealing with 2 classes.
561     * This rule learner always tries to predict the class with label
562     * m_Class.
563     *
564     * @param instances the training data
565     * @throws Exception if classifier can't be built successfully
566     */
567    public void buildClassifier(Instances instances) throws Exception {
568      m_ClassAttribute = instances.classAttribute();
569      if (!m_ClassAttribute.isNominal()) 
570        throw new UnsupportedClassTypeException(" Only nominal class, please.");
571      if(instances.numClasses() != 2)
572        throw new Exception(" Only 2 classes, please.");
573           
574      Instances data = new Instances(instances);
575      if(Utils.eq(data.sumOfWeights(),0))
576        throw new Exception(" No training data.");
577           
578      data.deleteWithMissingClass();
579      if(Utils.eq(data.sumOfWeights(),0))
580        throw new Exception(" The class labels of all the training data are missing."); 
581           
582      if(data.numInstances() < m_Folds)
583        throw new Exception(" Not enough data for REP.");
584           
585      m_Antds = new FastVector();       
586           
587      /* Split data into Grow and Prune*/
588      m_Random = new Random(m_Seed);
589      data.randomize(m_Random);
590      data.stratify(m_Folds);
591      Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
592      Instances pruneData=data.testCV(m_Folds, m_Folds-1);
593           
594      grow(growData);      // Build this rule
595           
596      prune(pruneData);    // Prune this rule
597    }
598       
599    /**
600     * Find all the instances in the dataset covered by this rule.
601     * The instances not covered will also be deducted from the the original data
602     * and returned by this procedure.
603     *
604     * @param insts the dataset to be covered by this rule.
605     * @return the instances covered and not covered by this rule
606     */
607    public Instances[] coveredByRule(Instances insts){
608      Instances[] data = new Instances[2];
609      data[0] = new Instances(insts, insts.numInstances());
610      data[1] = new Instances(insts, insts.numInstances());
611           
612      for(int i=0; i<insts.numInstances(); i++){
613        Instance datum = insts.instance(i);
614        if(isCover(datum))
615          data[0].add(datum);        // Covered by this rule
616        else
617          data[1].add(datum);        // Not covered by this rule
618      }
619           
620      return data;
621    }
622       
623    /**
624     * Whether the instance covered by this rule
625     *
626     * @param inst the instance in question
627     * @return the boolean value indicating whether the instance is covered by this rule
628     */
629    public boolean isCover(Instance datum){
630      boolean isCover=true;
631           
632      for(int i=0; i<m_Antds.size(); i++){
633        Antd antd = (Antd)m_Antds.elementAt(i);
634        if(!antd.isCover(datum)){
635          isCover = false;
636          break;
637        }
638      }
639           
640      return isCover;
641    }       
642       
643    /**
644     * Whether this rule has antecedents, i.e. whether it is a default rule
645     *
646     * @return the boolean value indicating whether the rule has antecedents
647     */
648    public boolean hasAntds(){
649      if (m_Antds == null)
650        return false;
651      else
652        return (m_Antds.size() > 0);
653    }     
654       
655    /**
656     * Build one rule using the growing data
657     *
658     * @param data the growing data used to build the rule
659     */   
660    private void grow(Instances data){
661      Instances growData = new Instances(data);
662           
663      m_AccuG = computeDefAccu(growData);
664      m_CoverG = growData.sumOfWeights();
665      /* Compute the default accurate rate of the growing data */
666      double defAcRt= m_AccuG / m_CoverG; 
667           
668      /* Keep the record of which attributes have already been used*/   
669      boolean[] used=new boolean [growData.numAttributes()];
670      for (int k=0; k<used.length; k++)
671        used[k]=false;
672      int numUnused=used.length;
673           
674      double maxInfoGain;
675      boolean isContinue = true; // The stopping criterion of this rule
676           
677      while (isContinue){   
678        maxInfoGain = 0;       // We require that infoGain be positive
679               
680        /* Build a list of antecedents */
681        Antd oneAntd=null;
682        Instances coverData = null;
683        Enumeration enumAttr=growData.enumerateAttributes();       
684        int index=-1; 
685               
686        /* Build one condition based on all attributes not used yet*/
687        while (enumAttr.hasMoreElements()){
688          Attribute att= (Attribute)(enumAttr.nextElement());
689          index++;
690                   
691          Antd antd =null;     
692          if(att.isNumeric())
693            antd = new NumericAntd(att);
694          else
695            antd = new NominalAntd(att);
696                   
697          if(!used[index]){
698            /* Compute the best information gain for each attribute,
699               it's stored in the antecedent formed by this attribute.
700               This procedure returns the data covered by the antecedent*/
701            Instances coveredData = computeInfoGain(growData, defAcRt, antd);
702            if(coveredData != null){
703              double infoGain = antd.getMaxInfoGain();                 
704              if(Utils.gr(infoGain, maxInfoGain)){
705                oneAntd=antd;
706                coverData = coveredData; 
707                maxInfoGain = infoGain;
708              }             
709            }
710          }
711        }
712               
713        if(oneAntd == null)      return;
714               
715        //Numeric attributes can be used more than once
716        if(!oneAntd.getAttr().isNumeric()){ 
717          used[oneAntd.getAttr().index()]=true;
718          numUnused--;
719        }
720               
721        m_Antds.addElement((Object)oneAntd);
722        growData = coverData;// Grow data size is shrinking
723               
724        defAcRt = oneAntd.getAccuRate();
725               
726        /* Stop if no more data, rule perfect, no more attributes */
727        if(Utils.eq(growData.sumOfWeights(), 0.0) || Utils.eq(defAcRt, 1.0) || (numUnused == 0))
728          isContinue = false;
729      }
730    }
731       
732    /**
733     * Compute the best information gain for the specified antecedent
734     * 
735     * @param data the data based on which the infoGain is computed
736     * @param defAcRt the default accuracy rate of data
737     * @param antd the specific antecedent
738     * @return the data covered by the antecedent
739     */
740    private Instances computeInfoGain(Instances instances, double defAcRt, Antd antd){
741      Instances data = new Instances(instances);
742           
743      /* Split the data into bags.
744         The information gain of each bag is also calculated in this procedure */
745      Instances[] splitData = antd.splitData(data, defAcRt, m_Class); 
746           
747      /* Get the bag of data to be used for next antecedents */
748      if(splitData != null)
749        return splitData[(int)antd.getAttrValue()];
750      else return null;
751    }
752       
753    /**
754     * Prune the rule using the pruning data and update the worth parameters for this rule
755     * The accuracy rate is used to prune the rule.
756     *
757     * @param pruneData the pruning data used to prune the rule
758     */   
759    private void prune(Instances pruneData){
760      Instances data=new Instances(pruneData);
761           
762      double total = data.sumOfWeights();
763           
764      /* The default accurate# and the the accuracy rate on pruning data */
765      double defAccu=0, defAccuRate=0;
766           
767      int size=m_Antds.size();
768      if(size == 0) return; // Default rule before pruning
769           
770      double[] worthRt = new double[size];
771      double[] coverage = new double[size];
772      double[] worthValue = new double[size];
773      for(int w=0; w<size; w++){
774        worthRt[w]=coverage[w]=worthValue[w]=0.0;
775      }
776           
777      /* Calculate accuracy parameters for all the antecedents in this rule */
778      for(int x=0; x<size; x++){
779        Antd antd=(Antd)m_Antds.elementAt(x);
780        Attribute attr= antd.getAttr();
781        Instances newData = new Instances(data);
782        data = new Instances(newData, newData.numInstances()); // Make data empty
783               
784        for(int y=0; y<newData.numInstances(); y++){
785          Instance ins=newData.instance(y);
786          if(!ins.isMissing(attr)){              // Attribute not missing
787            if(antd.isCover(ins)){             // Covered by this antecedent
788              coverage[x] += ins.weight();
789              data.add(ins);                 // Add to data for further pruning
790              if(Utils.eq(ins.classValue(), m_Class)) // Accurate prediction
791                worthValue[x] += ins.weight();
792            }
793          }
794        }
795               
796        if(coverage[x] != 0) 
797          worthRt[x] = worthValue[x]/coverage[x];
798      }
799           
800      /* Prune the antecedents according to the accuracy parameters */
801      for(int z=(size-1); z > 0; z--)
802        if(Utils.sm(worthRt[z], worthRt[z-1]))
803          m_Antds.removeElementAt(z);
804        else  break;
805           
806      /* Check whether this rule is a default rule */
807      if(m_Antds.size() == 1){
808        defAccu = computeDefAccu(pruneData);
809        defAccuRate = defAccu/total;                // Compute def. accuracy
810        if(Utils.sm(worthRt[0], defAccuRate)){      // Becomes a default rule
811          m_Antds.removeAllElements();
812        }
813      }   
814           
815      /* Update the worth parameters of this rule*/
816      int antdsSize = m_Antds.size();
817      if(antdsSize != 0){                          // Not a default rule
818        m_Worth = worthValue[antdsSize-1];       // WorthValues of the last antecedent
819        m_WorthRate = worthRt[antdsSize-1];
820        m_CoverP = coverage[antdsSize-1];
821        Antd last = (Antd)m_Antds.lastElement();
822        m_CoverG = last.getCover();
823        m_AccuG = last.getAccu();
824      }
825      else{                                        // Default rule   
826        m_Worth = defAccu;                       // Default WorthValues
827        m_WorthRate = defAccuRate;
828        m_CoverP = total;
829      }
830    }
831       
832    /**
833     * Private function to compute default number of accurate instances
834     * in the specified data for m_Class
835     *
836     * @param data the data in question
837     * @return the default accuracy number
838     */
839    private double computeDefAccu(Instances data){ 
840      double defAccu=0;
841      for(int i=0; i<data.numInstances(); i++){
842        Instance inst = data.instance(i);
843        if(Utils.eq(inst.classValue(), m_Class))
844          defAccu += inst.weight();
845      }
846      return defAccu;
847    }
848       
849    /** The following are get functions after prune() has set the value of worthRate and worth*/
850    public double getWorthRate(){ return m_WorthRate; }
851    public double getWorth(){ return m_Worth; }
852    public double getCoverP(){ return m_CoverP; }
853    public double getCoverG(){ return m_CoverG; }
854    public double getAccuG(){ return m_AccuG; }
855
856    /**
857     * Prints this rule with the specified class label
858     *
859     * @param att the string standing for attribute in the consequent of this rule
860     * @param cl the string standing for value in the consequent of this rule
861     * @return a textual description of this rule with the specified class label
862     */
863    public String toString(String att, String cl) {
864      StringBuffer text =  new StringBuffer();
865      if(m_Antds.size() > 0){
866        for(int j=0; j< (m_Antds.size()-1); j++)
867          text.append("(" + ((Antd)(m_Antds.elementAt(j))).toString()+ ") and ");
868        text.append("("+((Antd)(m_Antds.lastElement())).toString() + ")");
869      }
870      text.append(" => " + att + " = " + cl);
871      text.append("  ("+m_CoverG+"/"+(m_CoverG - m_AccuG)+") ["+
872                  m_CoverP+"/"+(m_CoverP - m_Worth)+"]");
873      return text.toString();
874    }
875       
876    /**
877     * Prints this rule
878     *
879     * @return a textual description of this rule
880     */
881    public String toString() {
882      return toString(m_ClassAttribute.name(), m_ClassAttribute.value((int)m_Class));
883    }       
884   
885    /**
886     * Returns the revision string.
887     *
888     * @return          the revision
889     */
890    public String getRevision() {
891      return RevisionUtils.extract("$Revision: 5928 $");
892    }
893  }
894   
895   
896  /**
897   * The single antecedent in the rule, which is composed of an attribute and
898   * the corresponding value.  There are two inherited classes, namely NumericAntd
899   * and NominalAntd in which the attributes are numeric and nominal respectively.
900   */
901  private abstract class Antd 
902    implements Serializable, RevisionHandler {
903
904    /** for serialization */
905    private static final long serialVersionUID = 5317379013858933369L;
906   
907    /** The attribute of the antecedent */
908    protected Attribute att;
909       
910    /** The attribute value of the antecedent. 
911       For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */
912    protected double value; 
913       
914    /** The maximum infoGain achieved by this antecedent test */
915    protected double maxInfoGain;
916       
917    /** The accurate rate of this antecedent test on the growing data */
918    protected double accuRate;
919       
920    /** The coverage of this antecedent */
921    protected double cover;
922       
923    /** The accurate data for this antecedent */
924    protected double accu;
925       
926    /** Constructor*/
927    public Antd(Attribute a){
928      att=a;
929      value=Double.NaN; 
930      maxInfoGain = 0;
931      accuRate = Double.NaN;
932      cover = Double.NaN;
933      accu = Double.NaN;
934    }
935       
936    /* The abstract members for inheritance */
937    public abstract Instances[] splitData(Instances data, double defAcRt, double cla);
938    public abstract boolean isCover(Instance inst);
939    public abstract String toString();
940       
941    /* Get functions of this antecedent */
942    public Attribute getAttr(){ return att; }
943    public double getAttrValue(){ return value; }
944    public double getMaxInfoGain(){ return maxInfoGain; }
945    public double getAccuRate(){ return accuRate; } 
946    public double getAccu(){ return accu; } 
947    public double getCover(){ return cover; } 
948   
949    /**
950     * Returns the revision string.
951     *
952     * @return          the revision
953     */
954    public String getRevision() {
955      return RevisionUtils.extract("$Revision: 5928 $");
956    }
957  }
958   
959  /**
960   * The antecedent with numeric attribute
961   */
962  private class NumericAntd 
963    extends Antd {
964   
965    /** for serialization */
966    static final long serialVersionUID = 1968761518014492214L;
967       
968    /** The split point for this numeric antecedent */
969    private double splitPoint;
970       
971    /** Constructor*/
972    public NumericAntd(Attribute a){ 
973      super(a);
974      splitPoint = Double.NaN;
975    }   
976       
977    /** Get split point of this numeric antecedent */
978    public double getSplitPoint(){ return splitPoint; }
979       
980    /**
981     * Implements the splitData function. 
982     * This procedure is to split the data into two bags according
983     * to the information gain of the numeric attribute value
984     * The maximum infoGain is also calculated. 
985     *
986     * @param insts the data to be split
987     * @param defAcRt the default accuracy rate for data
988     * @param cl the class label to be predicted
989     * @return the array of data after split
990     */
991    public Instances[] splitData(Instances insts, double defAcRt, double cl){
992      Instances data = new Instances(insts);
993      data.sort(att);
994      int total=data.numInstances();// Total number of instances without
995      // missing value for att
996           
997      int split=1;                  // Current split position
998      int prev=0;                   // Previous split position
999      int finalSplit=split;         // Final split position
1000      maxInfoGain = 0;
1001      value = 0;       
1002
1003      // Compute minimum number of Instances required in each split
1004      double minSplit =  0.1 * (data.sumOfWeights()) / 2.0;
1005      if (Utils.smOrEq(minSplit,m_MinNo)) 
1006        minSplit = m_MinNo;
1007      else if (Utils.gr(minSplit,25)) 
1008        minSplit = 25;     
1009           
1010      double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0;
1011           
1012      for(int x=0; x<data.numInstances(); x++){
1013        Instance inst = data.instance(x);
1014        if(inst.isMissing(att)){
1015          total = x;
1016          break;
1017        }
1018               
1019        sndCover += inst.weight();
1020        if(Utils.eq(inst.classValue(), cl))
1021          sndAccu += inst.weight();
1022      }
1023           
1024      // Enough Instances with known values?
1025      if (Utils.sm(sndCover,(2*minSplit)))
1026        return null;
1027           
1028      if(total == 0) return null; // Data all missing for the attribute         
1029      splitPoint = data.instance(total-1).value(att);   
1030           
1031      for(; split < total; split++){
1032        if(!Utils.eq(data.instance(split).value(att), 
1033                     data.instance(prev).value(att))){ // Can't split within same value
1034                   
1035          for(int y=prev; y<split; y++){
1036            Instance inst = data.instance(y);
1037            fstCover += inst.weight(); sndCover -= inst.weight(); 
1038            if(Utils.eq(data.instance(y).classValue(), cl)){
1039              fstAccu += inst.weight();  // First bag positive# ++
1040              sndAccu -= inst.weight();  // Second bag positive# --
1041            }                     
1042          }
1043                   
1044          if(Utils.sm(fstCover, minSplit) || Utils.sm(sndCover, minSplit)){
1045            prev=split;  // Cannot split because either
1046            continue;    // split has not enough data
1047          }
1048                   
1049          double fstAccuRate = 0, sndAccuRate = 0;
1050          if(!Utils.eq(fstCover,0))
1051            fstAccuRate = fstAccu/fstCover;             
1052          if(!Utils.eq(sndCover,0))
1053            sndAccuRate = sndAccu/sndCover;
1054                   
1055          /* Which bag has higher information gain? */
1056          boolean isFirst; 
1057          double fstInfoGain, sndInfoGain;
1058          double accRate, infoGain, coverage, accurate;
1059                   
1060          fstInfoGain = Utils.eq(fstAccuRate, 0) ? 
1061            0 : (fstAccu*(Utils.log2(fstAccuRate) - Utils.log2(defAcRt)));
1062          sndInfoGain = Utils.eq(sndAccuRate, 0) ? 
1063            0 : (sndAccu*(Utils.log2(sndAccuRate) - Utils.log2(defAcRt)));
1064          if(Utils.gr(fstInfoGain,sndInfoGain) || 
1065             (Utils.eq(fstInfoGain,sndInfoGain)&&(Utils.grOrEq(fstAccuRate,sndAccuRate)))){
1066            isFirst = true;
1067            infoGain = fstInfoGain;
1068            accRate = fstAccuRate;
1069            accurate = fstAccu;
1070            coverage = fstCover;
1071          }
1072          else{
1073            isFirst = false;
1074            infoGain = sndInfoGain;
1075            accRate = sndAccuRate;
1076            accurate = sndAccu;
1077            coverage = sndCover;
1078          }
1079                   
1080          boolean isUpdate = Utils.gr(infoGain, maxInfoGain);
1081                   
1082          /* Check whether so far the max infoGain */
1083          if(isUpdate){
1084            splitPoint = (data.instance(split).value(att) + 
1085                          data.instance(prev).value(att))/2;
1086            value = ((isFirst) ? 0 : 1);
1087            accuRate = accRate;
1088            accu = accurate;
1089            cover = coverage;
1090            maxInfoGain = infoGain;
1091            finalSplit = split;
1092          }
1093          prev=split;
1094        }
1095      }
1096           
1097      /* Split the data */
1098      Instances[] splitData = new Instances[2];
1099      splitData[0] = new Instances(data, 0, finalSplit);
1100      splitData[1] = new Instances(data, finalSplit, total-finalSplit);
1101           
1102      return splitData;
1103    }
1104       
1105    /**
1106     * Whether the instance is covered by this antecedent
1107     *
1108     * @param inst the instance in question
1109     * @return the boolean value indicating whether the instance is covered
1110     *         by this antecedent
1111     */
1112    public boolean isCover(Instance inst){
1113      boolean isCover=false;
1114      if(!inst.isMissing(att)){
1115        if(Utils.eq(value, 0)){
1116          if(Utils.smOrEq(inst.value(att), splitPoint))
1117            isCover=true;
1118        }
1119        else if(Utils.gr(inst.value(att), splitPoint))
1120          isCover=true;
1121      }
1122      return isCover;
1123    }
1124       
1125    /**
1126     * Prints this antecedent
1127     *
1128     * @return a textual description of this antecedent
1129     */
1130    public String toString() {
1131      String symbol = Utils.eq(value, 0.0) ? " <= " : " > ";
1132      return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));
1133    }   
1134   
1135    /**
1136     * Returns the revision string.
1137     *
1138     * @return          the revision
1139     */
1140    public String getRevision() {
1141      return RevisionUtils.extract("$Revision: 5928 $");
1142    }
1143  }
1144   
1145   
1146  /**
1147   * The antecedent with nominal attribute
1148   */
1149  private class NominalAntd 
1150    extends Antd {
1151   
1152    /** for serialization */
1153    static final long serialVersionUID = -256386137196078004L;
1154       
1155    /* The parameters of infoGain calculated for each attribute value */
1156    private double[] accurate;
1157    private double[] coverage;
1158    private double[] infoGain;
1159       
1160    /** Constructor*/
1161    public NominalAntd(Attribute a){ 
1162      super(a);
1163      int bag = att.numValues();
1164      accurate = new double[bag];
1165      coverage = new double[bag];
1166      infoGain = new double[bag];
1167    }   
1168       
1169    /**
1170     * Implements the splitData function. 
1171     * This procedure is to split the data into bags according
1172     * to the nominal attribute value
1173     * The infoGain for each bag is also calculated. 
1174     *
1175     * @param data the data to be split
1176     * @param defAcRt the default accuracy rate for data
1177     * @param cl the class label to be predicted
1178     * @return the array of data after split
1179     */
1180    public Instances[] splitData(Instances data, double defAcRt, double cl){
1181      int bag = att.numValues();
1182      Instances[] splitData = new Instances[bag];
1183           
1184      for(int x=0; x<bag; x++){
1185        accurate[x] = coverage[x] = infoGain[x] = 0;
1186        splitData[x] = new Instances(data, data.numInstances());
1187      }
1188           
1189      for(int x=0; x<data.numInstances(); x++){
1190        Instance inst=data.instance(x);
1191        if(!inst.isMissing(att)){
1192          int v = (int)inst.value(att);
1193          splitData[v].add(inst);
1194          coverage[v] += inst.weight();
1195          if(Utils.eq(inst.classValue(), cl))
1196            accurate[v] += inst.weight();
1197        }
1198      }
1199           
1200      // Check if >=2 splits have more than the minimal data
1201      int count=0; 
1202      for(int x=0; x<bag; x++){
1203        double t = coverage[x];
1204        if(Utils.grOrEq(t, m_MinNo)){
1205          double p = accurate[x];               
1206                   
1207          if(!Utils.eq(t, 0.0))
1208            infoGain[x] = p *((Utils.log2(p/t)) - (Utils.log2(defAcRt)));
1209          ++count;
1210        }
1211      }
1212               
1213      if(count < 2) // Don't split
1214        return null;
1215           
1216      value = (double)Utils.maxIndex(infoGain);
1217           
1218      cover = coverage[(int)value];
1219      accu = accurate[(int)value];
1220           
1221      if(!Utils.eq(cover,0))
1222        accuRate = accu / cover;
1223      else accuRate = 0;
1224           
1225      maxInfoGain = infoGain [(int)value];
1226           
1227      return splitData;
1228    }
1229       
1230    /**
1231     * Whether the instance is covered by this antecedent
1232     *
1233     * @param inst the instance in question
1234     * @return the boolean value indicating whether the instance is covered
1235     *         by this antecedent
1236     */
1237    public boolean isCover(Instance inst){
1238      boolean isCover=false;
1239      if(!inst.isMissing(att)){
1240        if(Utils.eq(inst.value(att), value))
1241          isCover=true;     
1242      }
1243      return isCover;
1244    }
1245       
1246    /**
1247     * Prints this antecedent
1248     *
1249     * @return a textual description of this antecedent
1250     */
1251    public String toString() {
1252      return (att.name() + " = " +att.value((int)value));
1253    } 
1254   
1255    /**
1256     * Returns the revision string.
1257     *
1258     * @return          the revision
1259     */
1260    public String getRevision() {
1261      return RevisionUtils.extract("$Revision: 5928 $");
1262    }
1263  }
1264
1265  /**
1266   * Returns default capabilities of the classifier.
1267   *
1268   * @return      the capabilities of this classifier
1269   */
1270  public Capabilities getCapabilities() {
1271    Capabilities result = super.getCapabilities();
1272    result.disableAll();
1273
1274    // attributes
1275    result.enable(Capability.NOMINAL_ATTRIBUTES);
1276    result.enable(Capability.NUMERIC_ATTRIBUTES);
1277    result.enable(Capability.DATE_ATTRIBUTES);
1278    result.enable(Capability.MISSING_VALUES);
1279
1280    // class
1281    result.enable(Capability.NOMINAL_CLASS);
1282    result.enable(Capability.MISSING_CLASS_VALUES);
1283   
1284    return result;
1285  }
1286
1287  /**
1288   * Builds a ripple-down manner rule learner.
1289   *
1290   * @param instances the training data
1291   * @throws Exception if classifier can't be built successfully
1292   */
1293  public void buildClassifier(Instances instances) throws Exception {
1294
1295    // can classifier handle the data?
1296    getCapabilities().testWithFail(instances);
1297
1298    // remove instances with missing class
1299    Instances data = new Instances(instances);
1300    data.deleteWithMissingClass();
1301   
1302    int numCl = data.numClasses();
1303    m_Root = new Ridor_node();
1304    m_Class = instances.classAttribute();     // The original class label
1305       
1306    int index = data.classIndex();
1307    m_Cover = data.sumOfWeights();
1308   
1309    m_Random = new Random(m_Seed);
1310       
1311    /* Create a binary attribute */
1312    FastVector binary_values = new FastVector(2);
1313    binary_values.addElement("otherClasses");
1314    binary_values.addElement("defClass");
1315    Attribute attr = new Attribute ("newClass", binary_values);
1316    data.insertAttributeAt(attr, index);       
1317    data.setClassIndex(index);                 // The new class label
1318
1319    /* Partition the data into bags according to their original class values */
1320    Instances[] dataByClass = new Instances[numCl];
1321    for(int i=0; i < numCl; i++)
1322      dataByClass[i] = new Instances(data, data.numInstances()); // Empty bags
1323    for(int i=0; i < data.numInstances(); i++){ // Partitioning
1324      Instance inst = data.instance(i);
1325      inst.setClassValue(0);           // Set new class vaue to be 0
1326      dataByClass[(int)inst.value(index+1)].add(inst); 
1327    }   
1328       
1329    for(int i=0; i < numCl; i++)   
1330      dataByClass[i].deleteAttributeAt(index+1);   // Delete original class
1331       
1332    m_Root.findRules(dataByClass, 0);
1333   
1334  }
1335   
1336  /**
1337   * Classify the test instance with the rule learner
1338   *
1339   * @param datum the instance to be classified
1340   * @return the classification
1341   */
1342  public double classifyInstance(Instance datum){
1343    return classify(m_Root, datum);
1344  }
1345   
1346  /**
1347   * Classify the test instance with one node of Ridor
1348   *
1349   * @param node the node of Ridor to classify the test instance
1350   * @param datum the instance to be classified
1351   * @return the classification
1352   */
1353  private double classify(Ridor_node node, Instance datum){
1354    double classValue = node.getDefClass();
1355    RidorRule[] rules = node.getRules();
1356
1357    if(rules != null){
1358      Ridor_node[] excepts = node.getExcepts(); 
1359      for(int i=0; i < excepts.length; i++){
1360        if(rules[i].isCover(datum)){
1361          classValue = classify(excepts[i], datum);
1362          break;
1363        }
1364      }
1365    }
1366       
1367    return classValue;
1368  }
1369
1370  /**
1371   * Returns an enumeration describing the available options
1372   * Valid options are: <p>
1373   *
1374   * -F number <br>
1375   * Set number of folds for reduced error pruning. One fold is
1376   * used as the pruning set. (Default: 3) <p>
1377   *
1378   * -S number <br>
1379   * Set number of shuffles for randomization. (Default: 10) <p>
1380   *
1381   * -A <br>
1382   * Set flag of whether use the error rate of all the data to select
1383   * the default class in each step. If not set, the learner will only use
1384   * the error rate in the pruning data <p>
1385   *
1386   * -M <br>
1387   * Set flag of whether use the majority class as the default class
1388   * in each step instead of choosing default class based on the error rate
1389   * (if the flag is not set) <p> 
1390   *
1391   * -N number <br>
1392   * Set the minimal weights of instances within a split.
1393   * (Default: 2) <p>
1394   *   
1395   * @return an enumeration of all the available options
1396   */
1397  public Enumeration listOptions() {
1398    Vector newVector = new Vector(5);
1399       
1400    newVector.addElement(new Option("\tSet number of folds for IREP\n" +
1401                                    "\tOne fold is used as pruning set.\n" +
1402                                    "\t(default 3)","F", 1, "-F <number of folds>"));
1403    newVector.addElement(new Option("\tSet number of shuffles to randomize\n" +
1404                                    "\tthe data in order to get better rule.\n" +
1405                                    "\t(default 1)","S", 1, "-S <number of shuffles>"));
1406    newVector.addElement(new Option("\tSet flag of whether use the error rate \n"+
1407                                    "\tof all the data to select the default class\n"+
1408                                    "\tin each step. If not set, the learner will only use"+
1409                                    "\tthe error rate in the pruning data","A", 0, "-A"));
1410    newVector.addElement(new Option("\t Set flag of whether use the majority class as\n"+
1411                                    "\tthe default class in each step instead of \n"+
1412                                    "\tchoosing default class based on the error rate\n"+
1413                                    "\t(if the flag is not set)","M", 0, "-M"));
1414    newVector.addElement(new Option("\tSet the minimal weights of instances\n" +
1415                                    "\twithin a split.\n" +
1416                                    "\t(default 2.0)","N", 1, "-N <min. weights>"));           
1417    return newVector.elements();
1418  }
1419   
1420  /**
1421   * Parses a given list of options. <p/>
1422   *
1423   <!-- options-start -->
1424   * Valid options are: <p/>
1425   *
1426   * <pre> -F &lt;number of folds&gt;
1427   *  Set number of folds for IREP
1428   *  One fold is used as pruning set.
1429   *  (default 3)</pre>
1430   *
1431   * <pre> -S &lt;number of shuffles&gt;
1432   *  Set number of shuffles to randomize
1433   *  the data in order to get better rule.
1434   *  (default 10)</pre>
1435   *
1436   * <pre> -A
1437   *  Set flag of whether use the error rate
1438   *  of all the data to select the default class
1439   *  in each step. If not set, the learner will only use the error rate in the pruning data</pre>
1440   *
1441   * <pre> -M
1442   *   Set flag of whether use the majority class as
1443   *  the default class in each step instead of
1444   *  choosing default class based on the error rate
1445   *  (if the flag is not set)</pre>
1446   *
1447   * <pre> -N &lt;min. weights&gt;
1448   *  Set the minimal weights of instances
1449   *  within a split.
1450   *  (default 2.0)</pre>
1451   *
1452   <!-- options-end -->
1453   *
1454   * @param options the list of options as an array of strings
1455   * @throws Exception if an option is not supported
1456   */
1457  public void setOptions(String[] options) throws Exception {
1458       
1459    String numFoldsString = Utils.getOption('F', options);
1460    if (numFoldsString.length() != 0) 
1461      m_Folds = Integer.parseInt(numFoldsString);
1462    else 
1463      m_Folds = 3;
1464       
1465    String numShuffleString = Utils.getOption('S', options);
1466    if (numShuffleString.length() != 0) 
1467      m_Shuffle = Integer.parseInt(numShuffleString);
1468    else 
1469      m_Shuffle = 1;
1470
1471    String seedString = Utils.getOption('s', options);
1472    if (seedString.length() != 0) 
1473      m_Seed = Integer.parseInt(seedString);
1474    else 
1475      m_Seed = 1;
1476       
1477    String minNoString = Utils.getOption('N', options);
1478    if (minNoString.length() != 0) 
1479      m_MinNo = Double.parseDouble(minNoString);
1480    else 
1481      m_MinNo = 2.0;
1482       
1483    m_IsAllErr = Utils.getFlag('A', options);
1484    m_IsMajority = Utils.getFlag('M', options);
1485  }
1486   
1487  /**
1488   * Gets the current settings of the Classifier.
1489   *
1490   * @return an array of strings suitable for passing to setOptions
1491   */
1492  public String [] getOptions() {
1493       
1494    String [] options = new String [8];
1495    int current = 0;
1496    options[current++] = "-F"; options[current++] = "" + m_Folds;
1497    options[current++] = "-S"; options[current++] = "" + m_Shuffle;
1498    options[current++] = "-N"; options[current++] = "" + m_MinNo;
1499       
1500    if(m_IsAllErr)
1501      options[current++] = "-A";
1502    if(m_IsMajority)
1503      options[current++] = "-M";       
1504    while (current < options.length) 
1505      options[current++] = "";
1506    return options;
1507  }
1508   
1509  /** Set and get members for parameters */
1510
1511  /**
1512   * Returns the tip text for this property
1513   * @return tip text for this property suitable for
1514   * displaying in the explorer/experimenter gui
1515   */
1516  public String foldsTipText() {
1517    return "Determines the amount of data used for pruning. One fold is used for "
1518      + "pruning, the rest for growing the rules.";
1519  }
1520
1521  public void setFolds(int fold){ m_Folds = fold; }
1522  public int getFolds(){ return m_Folds; }
1523
1524  /**
1525   * Returns the tip text for this property
1526   * @return tip text for this property suitable for
1527   * displaying in the explorer/experimenter gui
1528   */
1529  public String shuffleTipText() {
1530    return "Determines how often the data is shuffled before a rule "
1531      + "is chosen. If > 1, a rule is learned multiple times and the "
1532      + "most accurate rule is chosen.";
1533  }
1534
1535  public void setShuffle(int sh){ m_Shuffle = sh; }
1536  public int getShuffle(){ return m_Shuffle; }
1537
1538  /**
1539   * Returns the tip text for this property
1540   * @return tip text for this property suitable for
1541   * displaying in the explorer/experimenter gui
1542   */
1543  public String seedTipText() {
1544    return "The seed used for randomizing the data.";
1545  }
1546
1547  public void setSeed(int s){ m_Seed = s; }
1548  public int getSeed(){ return m_Seed; }
1549
1550  /**
1551   * Returns the tip text for this property
1552   * @return tip text for this property suitable for
1553   * displaying in the explorer/experimenter gui
1554   */
1555  public String wholeDataErrTipText() {
1556    return "Whether worth of rule is computed based on all the data "
1557      + "or just based on data covered by rule.";
1558  }
1559
1560  public void setWholeDataErr(boolean a){ m_IsAllErr = a; }
1561  public boolean getWholeDataErr(){ return m_IsAllErr; }
1562
1563  /**
1564   * Returns the tip text for this property
1565   * @return tip text for this property suitable for
1566   * displaying in the explorer/experimenter gui
1567   */
1568  public String majorityClassTipText() {
1569    return "Whether the majority class is used as default.";
1570  }
1571  public void setMajorityClass(boolean m){ m_IsMajority = m; }
1572  public boolean getMajorityClass(){ return m_IsMajority; }
1573
1574  /**
1575   * Returns the tip text for this property
1576   * @return tip text for this property suitable for
1577   * displaying in the explorer/experimenter gui
1578   */
1579  public String minNoTipText() {
1580    return "The minimum total weight of the instances in a rule.";
1581  }
1582
1583  public void setMinNo(double m){  m_MinNo = m; }
1584  public double getMinNo(){ return m_MinNo; }
1585   
1586  /**
1587   * Returns an enumeration of the additional measure names
1588   * @return an enumeration of the measure names
1589   */
1590  public Enumeration enumerateMeasures() {
1591    Vector newVector = new Vector(1);
1592    newVector.addElement("measureNumRules");
1593    return newVector.elements();
1594  }
1595   
1596  /**
1597   * Returns the value of the named measure
1598   * @param additionalMeasureName the name of the measure to query for its value
1599   * @return the value of the named measure
1600   * @throws IllegalArgumentException if the named measure is not supported
1601   */
1602  public double getMeasure(String additionalMeasureName) {
1603    if (additionalMeasureName.compareToIgnoreCase("measureNumRules") == 0) 
1604      return numRules();
1605    else 
1606      throw new IllegalArgumentException(additionalMeasureName+" not supported (Ripple down rule learner)");
1607  } 
1608   
1609  /**
1610   * Measure the number of rules in total in the model
1611   *
1612   * @return the number of rules
1613   */ 
1614  private double numRules(){
1615    int size = 0;
1616    if(m_Root != null)
1617      size = m_Root.size();
1618       
1619    return (double)(size+1); // Add the default rule
1620  }
1621   
1622  /**
1623   * Prints the all the rules of the rule learner.
1624   *
1625   * @return a textual description of the classifier
1626   */
1627  public String toString() {
1628    if (m_Root == null) 
1629      return "RIpple DOwn Rule Learner(Ridor): No model built yet.";
1630       
1631    return ("RIpple DOwn Rule Learner(Ridor) rules\n"+
1632            "--------------------------------------\n\n" + 
1633            m_Root.toString() +
1634            "\nTotal number of rules (incl. the default rule): " + (int)numRules());
1635  }
1636 
1637  /**
1638   * Returns the revision string.
1639   *
1640   * @return            the revision
1641   */
1642  public String getRevision() {
1643    return RevisionUtils.extract("$Revision: 5928 $");
1644  }
1645   
1646  /**
1647   * Main method.
1648   *
1649   * @param args the options for the classifier
1650   */
1651  public static void main(String[] args) {     
1652    runClassifier(new Ridor(), args);
1653  }
1654}
Note: See TracBrowser for help on using the repository browser.