source: src/main/java/weka/classifiers/rules/Prism.java @ 20

Last change on this file since 20 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 15.0 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    Prism.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.classifiers.rules;
24
25import weka.classifiers.Classifier;
26import weka.classifiers.AbstractClassifier;
27import weka.core.Attribute;
28import weka.core.Capabilities;
29import weka.core.Instance;
30import weka.core.Instances;
31import weka.core.RevisionHandler;
32import weka.core.RevisionUtils;
33import weka.core.TechnicalInformation;
34import weka.core.TechnicalInformationHandler;
35import weka.core.Capabilities.Capability;
36import weka.core.TechnicalInformation.Field;
37import weka.core.TechnicalInformation.Type;
38import weka.core.Utils;
39
40import java.io.Serializable;
41import java.util.Enumeration;
42
43/**
44 <!-- globalinfo-start -->
45 * Class for building and using a PRISM rule set for classification. Can only deal with nominal attributes. Can't deal with missing values. Doesn't do any pruning.<br/>
46 * <br/>
47 * For more information, see <br/>
48 * <br/>
49 * J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules. International Journal of Man-Machine Studies. 27(4):349-370.
50 * <p/>
51 <!-- globalinfo-end -->
52 *
53 <!-- technical-bibtex-start -->
54 * BibTeX:
55 * <pre>
56 * &#64;article{Cendrowska1987,
57 *    author = {J. Cendrowska},
58 *    journal = {International Journal of Man-Machine Studies},
59 *    number = {4},
60 *    pages = {349-370},
61 *    title = {PRISM: An algorithm for inducing modular rules},
62 *    volume = {27},
63 *    year = {1987}
64 * }
65 * </pre>
66 * <p/>
67 <!-- technical-bibtex-end -->
68 *
69 <!-- options-start -->
70 * Valid options are: <p/>
71 *
72 * <pre> -D
73 *  If set, classifier is run in debug mode and
74 *  may output additional info to the console</pre>
75 *
76 <!-- options-end -->
77 *
78 * @author Ian H. Witten (ihw@cs.waikato.ac.nz)
79 * @version $Revision: 5987 $
80*/
81public class Prism 
82  extends AbstractClassifier
83  implements TechnicalInformationHandler {
84
85  /** for serialization */
86  static final long serialVersionUID = 1310258880025902106L;
87 
88  /**
89   * Returns a string describing classifier
90   * @return a description suitable for
91   * displaying in the explorer/experimenter gui
92   */
93  public String globalInfo() {
94    return "Class for building and using a PRISM rule set for classification. "
95      + "Can only deal with nominal attributes. Can't deal with missing values. "
96      + "Doesn't do any pruning.\n\n"
97      + "For more information, see \n\n"
98      + getTechnicalInformation().toString();
99  }
100
101  /**
102   * Returns an instance of a TechnicalInformation object, containing
103   * detailed information about the technical background of this class,
104   * e.g., paper reference or book this class is based on.
105   *
106   * @return the technical information about this class
107   */
108  public TechnicalInformation getTechnicalInformation() {
109    TechnicalInformation        result;
110   
111    result = new TechnicalInformation(Type.ARTICLE);
112    result.setValue(Field.AUTHOR, "J. Cendrowska");
113    result.setValue(Field.YEAR, "1987");
114    result.setValue(Field.TITLE, "PRISM: An algorithm for inducing modular rules");
115    result.setValue(Field.JOURNAL, "International Journal of Man-Machine Studies");
116    result.setValue(Field.VOLUME, "27");
117    result.setValue(Field.NUMBER, "4");
118    result.setValue(Field.PAGES, "349-370");
119   
120    return result;
121  }
122
123  /**
124   * Class for storing a PRISM ruleset, i.e. a list of rules
125   */
126  private class PrismRule 
127    implements Serializable, RevisionHandler {
128   
129    /** for serialization */
130    static final long serialVersionUID = 4248784350656508583L;
131   
132    /** The classification */
133    private int m_classification;
134
135    /** The instance */
136    private Instances m_instances;
137
138    /** First test of this rule */
139    private Test m_test; 
140
141    /** Number of errors made by this rule (will end up 0) */
142    private int m_errors; 
143
144    /** The next rule in the list */
145    private PrismRule m_next;
146
147    /**
148     * Constructor that takes instances and the classification.
149     *
150     * @param data the instances
151     * @param cl the class
152     * @exception Exception if something goes wrong
153     */
154    public PrismRule(Instances data, int cl) throws Exception {
155
156      m_instances = data;
157      m_classification = cl;
158      m_test = null;
159      m_next = null;
160      m_errors = 0;
161      Enumeration enu = data.enumerateInstances();
162      while (enu.hasMoreElements()) {
163        if ((int) ((Instance) enu.nextElement()).classValue() != cl) {
164          m_errors++;
165        }
166      }
167      m_instances = new Instances(m_instances, 0);
168    } 
169
170    /**
171     * Returns the result assigned by this rule to a given instance.
172     *
173     * @param inst the instance to be classified
174     * @return the classification
175     */
176    public int resultRule(Instance inst) {
177
178      if (m_test == null || m_test.satisfies(inst)) {
179        return m_classification;
180      } else {
181        return -1;
182      }
183    }
184
185    /**
186     * Returns the result assigned by these rules to a given instance.
187     *
188     * @param inst the instance to be classified
189     * @return the classification
190     */
191    public int resultRules(Instance inst) {
192
193      if (resultRule(inst) != -1) {
194        return m_classification;
195      } else if (m_next != null) {
196        return m_next.resultRules(inst);
197      } else {
198        return -1;
199      }
200    }
201
202    /**
203     * Returns the set of instances that are covered by this rule.
204     *
205     * @param data the instances to be checked
206     * @return the instances covered
207     */
208    public Instances coveredBy(Instances data) {
209
210      Instances r = new Instances(data, data.numInstances());
211      Enumeration enu = data.enumerateInstances();
212      while (enu.hasMoreElements()) {
213        Instance i = (Instance) enu.nextElement();
214        if (resultRule(i) != -1) {
215          r.add(i);
216        }
217      }
218      r.compactify();
219      return r;
220    }
221
222    /**
223     * Returns the set of instances that are not covered by this rule.
224     *
225     * @param data the instances to be checked
226     * @return the instances not covered
227     */
228    public Instances notCoveredBy(Instances data) {
229
230      Instances r = new Instances(data, data.numInstances());
231      Enumeration enu = data.enumerateInstances();
232      while (enu.hasMoreElements()) {
233        Instance i = (Instance) enu.nextElement();
234        if (resultRule(i) == -1) {
235          r.add(i);
236        }
237      }
238      r.compactify();
239      return r;
240    }
241
242    /**
243     * Prints the set of rules.
244     *
245     * @return a description of the rules as a string
246     */
247    public String toString() {
248
249      try {
250        StringBuffer text = new StringBuffer();
251        if (m_test != null) {
252          text.append("If ");
253          for (Test t = m_test; t != null; t = t.m_next) {
254            if (t.m_attr == -1) {
255              text.append("?");
256            } else {
257              text.append(m_instances.attribute(t.m_attr).name() + " = " +
258                          m_instances.attribute(t.m_attr).value(t.m_val));
259            }
260            if (t.m_next != null) {
261              text.append("\n   and ");
262            }
263          }
264          text.append(" then ");
265        }
266        text.append(m_instances.classAttribute().value(m_classification) + "\n");
267        if (m_next != null) {
268          text.append(m_next.toString());
269        }
270        return text.toString();
271      } catch (Exception e) {
272        return "Can't print Prism classifier!";
273      }
274    }
275   
276    /**
277     * Returns the revision string.
278     *
279     * @return          the revision
280     */
281    public String getRevision() {
282      return RevisionUtils.extract("$Revision: 5987 $");
283    }
284  }
285 
286  /**
287   * Class for storing a list of attribute-value tests
288   */
289  private class Test 
290    implements Serializable, RevisionHandler {
291   
292    /** for serialization */
293    static final long serialVersionUID = -8925333011350280799L;
294
295    /** Attribute to test */
296    private int m_attr = -1; 
297
298    /** The attribute's value */
299    private int m_val; 
300
301    /** The next test in the rule */
302    private Test m_next = null; 
303
304    /**
305     * Returns whether a given instance satisfies this test.
306     *
307     * @param inst the instance to be tested
308     * @return true if the instance satisfies the test
309     */
310    private boolean satisfies(Instance inst) {
311
312      if ((int) inst.value(m_attr) == m_val) {
313        if (m_next == null) {
314          return true;
315        } else {
316          return m_next.satisfies(inst);
317        }
318      }
319      return false;   
320    }
321   
322    /**
323     * Returns the revision string.
324     *
325     * @return          the revision
326     */
327    public String getRevision() {
328      return RevisionUtils.extract("$Revision: 5987 $");
329    }
330  }
331
332  /** The first rule in the list of rules */
333  private PrismRule m_rules;
334
335  /**
336   * Classifies a given instance.
337   *
338   * @param inst the instance to be classified
339   * @return the classification
340   */
341  public double classifyInstance(Instance inst) {
342
343    int result = m_rules.resultRules(inst);
344    if (result == -1) {
345      return Utils.missingValue();
346    } else {
347      return (double)result;
348    }
349  }
350
351  /**
352   * Returns default capabilities of the classifier.
353   *
354   * @return      the capabilities of this classifier
355   */
356  public Capabilities getCapabilities() {
357    Capabilities result = super.getCapabilities();
358    result.disableAll();
359
360    // attributes
361    result.enable(Capability.NOMINAL_ATTRIBUTES);
362
363    // class
364    result.enable(Capability.NOMINAL_CLASS);
365    result.enable(Capability.MISSING_CLASS_VALUES);
366   
367    return result;
368  }
369
370  /**
371   * Generates the classifier.
372   *
373   * @param data the data to be used
374   * @exception Exception if the classifier can't built successfully
375   */
376  public void buildClassifier(Instances data) throws Exception {
377
378    int cl; // possible value of theClass
379    Instances E, ruleE;
380    PrismRule rule = null;
381    Test test = null, oldTest = null;
382    int bestCorrect, bestCovers, attUsed;
383    Enumeration enumAtt;
384
385    // can classifier handle the data?
386    getCapabilities().testWithFail(data);
387
388    // remove instances with missing class
389    data = new Instances(data);
390    data.deleteWithMissingClass();
391   
392    for (cl = 0; cl < data.numClasses(); cl++) { // for each class cl
393      E = data; // initialize E to the instance set
394      while (contains(E, cl)) { // while E contains examples in class cl
395        rule = addRule(rule, new PrismRule(E, cl)); // make a new rule
396        ruleE = E; // examples covered by this rule
397        while (rule.m_errors != 0) { // until the rule is perfect
398          test = new Test(); // make a new test
399          bestCorrect = bestCovers = attUsed = 0;
400
401          // for every attribute not mentioned in the rule
402          enumAtt = ruleE.enumerateAttributes();
403          while (enumAtt.hasMoreElements()) {
404            Attribute attr = (Attribute) enumAtt.nextElement();
405            if (isMentionedIn(attr, rule.m_test)) {
406              attUsed++; 
407              continue;
408            }
409            int M = attr.numValues();
410            int[] covers = new int [M];
411            int[] correct = new int [M];
412            for (int j = 0; j < M; j++) {
413              covers[j] = correct[j] = 0;
414            }
415
416            // ... calculate the counts for this class
417            Enumeration enu = ruleE.enumerateInstances();
418            while (enu.hasMoreElements()) {
419              Instance i = (Instance) enu.nextElement();
420              covers[(int) i.value(attr)]++;
421              if ((int) i.classValue() == cl) {
422                correct[(int) i.value(attr)]++;
423              }
424            }
425
426            // ... for each value of this attribute, see if this test is better
427            for (int val = 0; val < M; val ++) {
428              int diff = correct[val] * bestCovers - bestCorrect * covers[val];
429
430              // this is a ratio test, correct/covers vs best correct/covers
431              if (test.m_attr == -1
432                  || diff > 0 || (diff == 0 && correct[val] > bestCorrect)) {
433
434                // update the rule to use this test
435                bestCorrect = correct[val];
436                bestCovers = covers[val];
437                test.m_attr = attr.index();
438                test.m_val = val;
439                rule.m_errors = bestCovers - bestCorrect;
440              }
441            }
442          }
443          if (test.m_attr == -1) { // Couldn't find any sensible test
444            break;
445          }
446          oldTest = addTest(rule, oldTest, test);
447          ruleE = rule.coveredBy(ruleE);
448          if (attUsed == (data.numAttributes() - 1)) { // Used all attributes.
449            break;
450          }
451        }
452        E = rule.notCoveredBy(E);
453      }
454    }
455  }
456
457  /**
458   * Add a rule to the ruleset.
459   *
460   * @param lastRule the last rule in the rule set
461   * @param newRule the rule to be added
462   * @return the new last rule in the rule set
463   */
464  private PrismRule addRule(PrismRule lastRule, PrismRule newRule) {
465
466    if (lastRule == null) {
467      m_rules = newRule;
468    } else {
469      lastRule.m_next = newRule;
470    }
471    return newRule;
472  }
473
474  /**
475   * Add a test to this rule.
476   *
477   * @param rule the rule to which test is to be added
478   * @param lastTest the rule's last test
479   * @param newTest the test to be added
480   * @return the new last test of the rule
481   */
482  private Test addTest(PrismRule rule, Test lastTest, Test newTest) {
483
484    if (rule.m_test == null) {
485      rule.m_test = newTest;
486    } else {
487      lastTest.m_next = newTest;
488    }
489    return newTest;
490  }
491
492  /**
493   * Does E contain any examples in the class C?
494   *
495   * @param E the instances to be checked
496   * @param C the class
497   * @return true if there are any instances of class C
498   * @throws Exception if something goes wrong
499   */
500  private static boolean contains(Instances E, int C) throws Exception {
501
502    Enumeration enu = E.enumerateInstances();
503    while (enu.hasMoreElements()) {
504      if ((int) ((Instance) enu.nextElement()).classValue() == C) {
505        return true;
506      }
507    }
508    return false;
509  }
510
511  /**
512   * Is this attribute mentioned in the rule?
513   *
514   * @param attr the attribute to be checked for
515   * @param t test contained by rule
516   * @return true if the attribute is mentioned in the rule
517   */
518  private static boolean isMentionedIn(Attribute attr, Test t) {
519
520    if (t == null) { 
521      return false;
522    }
523    if (t.m_attr == attr.index()) {
524      return true;
525    }
526    return isMentionedIn(attr, t.m_next);
527  }   
528
529  /**
530   * Prints a description of the classifier.
531   *
532   * @return a description of the classifier as a string
533   */
534  public String toString() {
535
536    if (m_rules == null) {
537      return "Prism: No model built yet.";
538    }
539    return "Prism rules\n----------\n" + m_rules.toString();
540  }
541 
542  /**
543   * Returns the revision string.
544   *
545   * @return            the revision
546   */
547  public String getRevision() {
548    return RevisionUtils.extract("$Revision: 5987 $");
549  }
550
551  /**
552   * Main method for testing this class
553   *
554   * @param args the commandline parameters
555   */
556  public static void main(String[] args) {
557    runClassifier(new Prism(), args);
558  }
559}
Note: See TracBrowser for help on using the repository browser.