source: src/main/java/weka/associations/Tertius.java @ 19

Last change on this file since 19 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 53.4 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    Tertius.java
19 *    Copyright (C) 2003 Peter A. Flach, Nicolas Lachiche
20 *
21 *    Thanks to Amelie Deltour for porting the original C code to Java
22 *    and integrating it into Weka.
23 */
24
25package weka.associations;
26
27import weka.associations.tertius.AttributeValueLiteral;
28import weka.associations.tertius.IndividualInstances;
29import weka.associations.tertius.IndividualLiteral;
30import weka.associations.tertius.Literal;
31import weka.associations.tertius.Predicate;
32import weka.associations.tertius.Rule;
33import weka.associations.tertius.SimpleLinkedList;
34import weka.core.Attribute;
35import weka.core.Capabilities;
36import weka.core.Instances;
37import weka.core.Option;
38import weka.core.OptionHandler;
39import weka.core.RevisionUtils;
40import weka.core.SelectedTag;
41import weka.core.Tag;
42import weka.core.TechnicalInformation;
43import weka.core.TechnicalInformationHandler;
44import weka.core.Utils;
45import weka.core.Capabilities.Capability;
46import weka.core.TechnicalInformation.Field;
47import weka.core.TechnicalInformation.Type;
48
49import java.awt.BorderLayout;
50import java.awt.Button;
51import java.awt.Font;
52import java.awt.Frame;
53import java.awt.Label;
54import java.awt.TextField;
55import java.awt.event.ActionEvent;
56import java.awt.event.ActionListener;
57import java.io.BufferedReader;
58import java.io.File;
59import java.io.FileReader;
60import java.io.Reader;
61import java.util.ArrayList;
62import java.util.Date;
63import java.util.Enumeration;
64import java.util.Vector;
65
66/**
67 <!-- globalinfo-start -->
68 * Finds rules according to confirmation measure (Tertius-type algorithm).<br/>
69 * <br/>
70 * For more information see:<br/>
71 * <br/>
72 * P. A. Flach, N. Lachiche (1999). Confirmation-Guided Discovery of first-order rules with Tertius. Machine Learning. 42:61-95.
73 * <p/>
74 <!-- globalinfo-end -->
75 *
76 <!-- technical-bibtex-start -->
77 * BibTeX:
78 * <pre>
79 * &#64;article{Flach1999,
80 *    author = {P. A. Flach and N. Lachiche},
81 *    journal = {Machine Learning},
82 *    pages = {61-95},
83 *    title = {Confirmation-Guided Discovery of first-order rules with Tertius},
84 *    volume = {42},
85 *    year = {1999}
86 * }
87 * </pre>
88 * <p/>
89 <!-- technical-bibtex-end -->
90 *
91 <!-- options-start -->
92 * Valid options are: <p/>
93 *
94 * <pre> -K &lt;number of values in result&gt;
95 *  Set maximum number of confirmation  values in the result. (default: 10)</pre>
96 *
97 * <pre> -F &lt;frequency threshold&gt;
98 *  Set frequency threshold for pruning. (default: 0)</pre>
99 *
100 * <pre> -C &lt;confirmation threshold&gt;
101 *  Set confirmation threshold. (default: 0)</pre>
102 *
103 * <pre> -N &lt;noise threshold&gt;
104 *  Set noise threshold : maximum frequency of counter-examples.
105 *  0 gives only satisfied rules. (default: 1)</pre>
106 *
107 * <pre> -R
108 *  Allow attributes to be repeated in a same rule.</pre>
109 *
110 * <pre> -L &lt;number of literals&gt;
111 *  Set maximum number of literals in a rule. (default: 4)</pre>
112 *
113 * <pre> -G &lt;0=no negation | 1=body | 2=head | 3=body and head&gt;
114 *  Set the negations in the rule. (default: 0)</pre>
115 *
116 * <pre> -S
117 *  Consider only classification rules.</pre>
118 *
119 * <pre> -c &lt;class index&gt;
120 *  Set index of class attribute. (default: last).</pre>
121 *
122 * <pre> -H
123 *  Consider only horn clauses.</pre>
124 *
125 * <pre> -E
126 *  Keep equivalent rules.</pre>
127 *
128 * <pre> -M
129 *  Keep same clauses.</pre>
130 *
131 * <pre> -T
132 *  Keep subsumed rules.</pre>
133 *
134 * <pre> -I &lt;0=always match | 1=never match | 2=significant&gt;
135 *  Set the way to handle missing values. (default: 0)</pre>
136 *
137 * <pre> -O
138 *  Use ROC analysis. </pre>
139 *
140 * <pre> -p &lt;name of file&gt;
141 *  Set the file containing the parts of the individual for individual-based learning.</pre>
142 *
143 * <pre> -P &lt;0=no output | 1=on stdout | 2=in separate window&gt;
144 *  Set output of current values. (default: 0)</pre>
145 *
146 <!-- options-end -->
147 *
148 * @author <a href="mailto:adeltour@netcourrier.com">Amelie Deltour</a>
149 * @version $Revision: 5444 $
150 */
151
152public class Tertius 
153  extends AbstractAssociator
154  implements OptionHandler, Runnable, TechnicalInformationHandler {
155
156  /** for serialization */
157  static final long serialVersionUID = 5556726848380738179L;
158 
159  /** The results. */
160  private SimpleLinkedList m_results;
161
162  /** Number of hypotheses considered. */
163  private int m_hypotheses;
164
165  /** Number of hypotheses explored. */
166  private int m_explored;
167
168  /** Time needed for the search. */
169  private Date m_time;
170
171  /** Field to output the current values. */ 
172  private TextField m_valuesText;
173
174  /** Instances used for the search. */
175  private Instances m_instances;
176
177  /** Predicates used in the rules. */
178  private ArrayList m_predicates;
179
180  /** Status of the search. */
181  private int m_status;
182  /** Status of the search: normal */
183  private static final int NORMAL = 0;
184  /** Status of the search: memory problem */
185  private static final int MEMORY = 1;
186  /** Status of the search: user interruption */
187  private static final int STOP = 2;
188 
189  /* Pruning options. */
190
191  /** Number of best confirmation values to search. */
192  private int m_best;
193
194  /** Frequency threshold for the body and the negation of the head. */
195  private double m_frequencyThreshold;
196
197  /** Confirmation threshold for the rules. */
198  private double m_confirmationThreshold;
199
200  /** Maximal number of counter-instances. */
201  private double m_noiseThreshold;
202
203  /* Search space & language bias options. */
204
205  /** Repeat attributes ? */
206  private boolean m_repeat;
207
208  /** Number of literals in a rule. */
209  private int m_numLiterals;
210
211  /** Type of negation: none */
212  private static final int NONE = 0;
213  /** Type of negation: body */
214  private static final int BODY = 1;
215  /** Type of negation: head */
216  private static final int HEAD = 2;
217  /** Type of negation: all */
218  private static final int ALL = 3;
219  /** Types of negation. */
220  private static final Tag [] TAGS_NEGATION = {
221    new Tag(NONE, "None"),
222    new Tag(BODY, "Body"),
223    new Tag(HEAD, "Head"),
224    new Tag(ALL, "Both")
225      };
226
227  /** Type of negation used in the rules. */
228  private int m_negation;
229
230  /** Classification bias. */
231  private boolean m_classification;
232
233  /** Index of class attribute. */
234  private int m_classIndex;
235
236  /** Horn clauses bias. */
237  private boolean m_horn;
238
239  /* Subsumption tests options. */
240
241  /** Perform test on equivalent rules ? */
242  private boolean m_equivalent;
243
244  /** Perform test on same clauses ? */
245  private boolean m_sameClause;
246 
247  /** Perform subsumption test ? */
248  private boolean m_subsumption;
249
250  /** Way of handling missing values: min counterinstances */
251  public static final int EXPLICIT = 0;
252  /** Way of handling missing values: max counterinstances */
253  public static final int IMPLICIT = 1;
254  /** Way of handling missing values: missing as a particular value */
255  public static final int SIGNIFICANT = 2;
256  /** Ways of handling missing values.  */
257  private static final Tag [] TAGS_MISSING = {
258    new Tag(EXPLICIT, "Matches all"),
259    new Tag(IMPLICIT, "Matches none"),
260    new Tag(SIGNIFICANT, "Significant")
261      };
262
263  /** Way of handling missing values in the search. */
264  private int m_missing;
265
266  /** Perform ROC analysis ? */
267  private boolean m_roc;
268
269  /** Name of the file containing the parts for individual-based learning. */
270  private String m_partsString;
271 
272  /** Part instances for individual-based learning. */
273  private Instances m_parts;
274
275  /** Type of values output: No */ 
276  private static final int NO = 0;
277  /** Type of values output: stdout */ 
278  private static final int OUT = 1;
279  /** Type of values output: Window */ 
280  private static final int WINDOW = 2;
281  /** Types of values output. */ 
282  private static final Tag [] TAGS_VALUES = {
283    new Tag(NO, "No"),
284    new Tag(OUT, "stdout"),
285    new Tag(WINDOW, "Window")
286      };
287
288  /** Type of values output. */
289  private int m_printValues;
290
291  /**
292   * Constructor that sets the options to the default values.
293   */
294  public Tertius() {
295
296    resetOptions();
297  }
298
299  /**
300   * Returns a string describing this associator.
301   *
302   * @return A description of the evaluator suitable for
303   * displaying in the explorer/experimenter gui.
304   */
305  public String globalInfo() {
306    return 
307        "Finds rules according to confirmation measure (Tertius-type "
308      + "algorithm).\n\n"
309      + "For more information see:\n\n"
310      + getTechnicalInformation().toString();
311  }
312
313  /**
314   * Returns an instance of a TechnicalInformation object, containing
315   * detailed information about the technical background of this class,
316   * e.g., paper reference or book this class is based on.
317   *
318   * @return the technical information about this class
319   */
320  public TechnicalInformation getTechnicalInformation() {
321    TechnicalInformation        result;
322   
323    result = new TechnicalInformation(Type.ARTICLE);
324    result.setValue(Field.AUTHOR, "P. A. Flach and N. Lachiche");
325    result.setValue(Field.YEAR, "1999");
326    result.setValue(Field.TITLE, "Confirmation-Guided Discovery of first-order rules with Tertius");
327    result.setValue(Field.JOURNAL, "Machine Learning");
328    result.setValue(Field.VOLUME, "42");
329    result.setValue(Field.PAGES, "61-95");
330   
331    return result;
332  }
333
334  /**
335   * Resets the options to the default values.
336   */
337  public void resetOptions() {
338
339    /* Pruning options. */
340    m_best = 10;
341    m_frequencyThreshold = 0;
342    m_confirmationThreshold = 0;
343    m_noiseThreshold = 1;
344
345    /* Search space & language bias options. */
346    m_repeat = false;
347    m_numLiterals = 4;
348    m_negation = NONE;
349    m_classification = false;
350    m_classIndex = 0;
351    m_horn = false;
352
353    /* Subsumption tests options. */
354    m_equivalent = true;
355    m_sameClause = true;
356    m_subsumption = true;
357
358    /* Missing values. */
359    m_missing = EXPLICIT;
360
361    /* ROC analysis. */
362    m_roc = false;
363
364    /* Individual-based learning. */
365    m_partsString = "";
366    m_parts = null;
367
368    /* Values output. */
369    m_printValues = NO;
370  }
371
372  /**
373   * Returns an enumeration describing the available options.
374   *
375   * @return An enumeration of all the available options.
376   */
377  public Enumeration listOptions() {
378   
379    Vector newVector = new Vector(17);
380
381    /* Pruning options. */
382    newVector.addElement(new Option("\tSet maximum number of confirmation  "
383                                    + "values in the result. (default: 10)",
384                                    "K", 1, "-K <number of values in result>"));
385    newVector.addElement(new Option("\tSet frequency threshold for pruning. "
386                                    + "(default: 0)",
387                                    "F", 1, "-F <frequency threshold>"));
388    newVector.addElement(new Option("\tSet confirmation threshold. "
389                                    + "(default: 0)",
390                                    "C", 1, "-C <confirmation threshold>"));
391    newVector.addElement(new Option("\tSet noise threshold : maximum frequency "
392                                    + "of counter-examples.\n\t0 gives only "
393                                    + "satisfied rules. (default: 1)",
394                                    "N", 1, "-N <noise threshold>"));
395
396    /* Search space & language bias options. */
397    newVector.addElement(new Option("\tAllow attributes to be repeated in a "
398                                    + "same rule.",
399                                    "R", 0, "-R"));
400    newVector.addElement(new Option("\tSet maximum number of literals in a "
401                                    + "rule. (default: 4)",
402                                    "L", 1, "-L <number of literals>"));
403    newVector.addElement(new Option("\tSet the negations in the rule. "
404                                    + "(default: 0)",
405                                    "G", 1, "-G <0=no negation | "
406                                    + "1=body | "
407                                    + "2=head | "
408                                    + "3=body and head>"));
409    newVector.addElement(new Option("\tConsider only classification rules.",
410                                    "S", 0, "-S"));
411    newVector.addElement(new Option("\tSet index of class attribute. "
412                                    + "(default: last).",
413                                    "c", 1, "-c <class index>"));
414    newVector.addElement(new Option("\tConsider only horn clauses.",
415                                    "H", 0, "-H"));
416
417    /* Subsumption tests options. */
418    newVector.addElement(new Option("\tKeep equivalent rules.",
419                                    "E", 0, "-E"));
420    newVector.addElement(new Option("\tKeep same clauses.",
421                                    "M", 0, "-M"));
422    newVector.addElement(new Option("\tKeep subsumed rules.",
423                                    "T", 0, "-T"));
424
425    /* Missing values options. */
426    newVector.addElement(new Option("\tSet the way to handle missing values. " 
427                                    + "(default: 0)",
428                                    "I", 1, "-I <0=always match | "
429                                    + "1=never match | "
430                                    + "2=significant>"));
431
432    /* ROC analysis. */
433    newVector.addElement(new Option("\tUse ROC analysis. ",
434                                    "O", 0, "-O"));
435
436    /* Individual-based learning. */
437    newVector.addElement(new Option("\tSet the file containing the parts of "
438                                    + "the individual for individual-based "
439                                    + "learning.",
440                                    "p", 1, "-p <name of file>"));
441
442    /* Values output. */
443    newVector.addElement(new Option("\tSet output of current values. "
444                                    + "(default: 0)",
445                                    "P", 1, "-P <0=no output | "
446                                    + "1=on stdout | "
447                                    + "2=in separate window>"));
448   
449    return newVector.elements();
450  }
451 
452  /**
453   * Parses a given list of options. <p/>
454   *
455   <!-- options-start -->
456   * Valid options are: <p/>
457   *
458   * <pre> -K &lt;number of values in result&gt;
459   *  Set maximum number of confirmation  values in the result. (default: 10)</pre>
460   *
461   * <pre> -F &lt;frequency threshold&gt;
462   *  Set frequency threshold for pruning. (default: 0)</pre>
463   *
464   * <pre> -C &lt;confirmation threshold&gt;
465   *  Set confirmation threshold. (default: 0)</pre>
466   *
467   * <pre> -N &lt;noise threshold&gt;
468   *  Set noise threshold : maximum frequency of counter-examples.
469   *  0 gives only satisfied rules. (default: 1)</pre>
470   *
471   * <pre> -R
472   *  Allow attributes to be repeated in a same rule.</pre>
473   *
474   * <pre> -L &lt;number of literals&gt;
475   *  Set maximum number of literals in a rule. (default: 4)</pre>
476   *
477   * <pre> -G &lt;0=no negation | 1=body | 2=head | 3=body and head&gt;
478   *  Set the negations in the rule. (default: 0)</pre>
479   *
480   * <pre> -S
481   *  Consider only classification rules.</pre>
482   *
483   * <pre> -c &lt;class index&gt;
484   *  Set index of class attribute. (default: last).</pre>
485   *
486   * <pre> -H
487   *  Consider only horn clauses.</pre>
488   *
489   * <pre> -E
490   *  Keep equivalent rules.</pre>
491   *
492   * <pre> -M
493   *  Keep same clauses.</pre>
494   *
495   * <pre> -T
496   *  Keep subsumed rules.</pre>
497   *
498   * <pre> -I &lt;0=always match | 1=never match | 2=significant&gt;
499   *  Set the way to handle missing values. (default: 0)</pre>
500   *
501   * <pre> -O
502   *  Use ROC analysis. </pre>
503   *
504   * <pre> -p &lt;name of file&gt;
505   *  Set the file containing the parts of the individual for individual-based learning.</pre>
506   *
507   * <pre> -P &lt;0=no output | 1=on stdout | 2=in separate window&gt;
508   *  Set output of current values. (default: 0)</pre>
509   *
510   <!-- options-end -->
511   *
512   * @param options The list of options as an array of strings.
513   * @throws Exception if an option is not supported.
514   */
515  public void setOptions(String [] options) throws Exception {
516   
517    resetOptions();
518   
519    /* Pruning options. */
520    String bestString = Utils.getOption('K', options);
521    if (bestString.length() != 0) {
522      try {
523        m_best = Integer.parseInt(bestString);
524      } catch (Exception e) {
525        throw new Exception("Invalid value for -K option: "
526                            + e.getMessage() + ".");
527      }
528      if (m_best < 1) {
529        throw new Exception("Number of confirmation values has to be "
530                            + "greater than one!");
531      }
532    }
533    String frequencyThresholdString = Utils.getOption('F', options);
534    if (frequencyThresholdString.length() != 0) {
535      try {     
536        m_frequencyThreshold
537          = (new Double(frequencyThresholdString)).doubleValue();
538      } catch (Exception e) {
539        throw new Exception("Invalid value for -F option: "
540                            + e.getMessage() + ".");
541      }
542      if (m_frequencyThreshold < 0 || m_frequencyThreshold > 1) {
543        throw new Exception("Frequency threshold has to be between "
544                            + "zero and one!");
545      }
546    }
547    String confirmationThresholdString = Utils.getOption('C', options);
548    if (confirmationThresholdString.length() != 0) {
549      try {
550        m_confirmationThreshold
551          = (new Double(confirmationThresholdString)).doubleValue();
552      } catch (Exception e) {
553        throw new Exception("Invalid value for -C option: "
554                            + e.getMessage() + ".");
555      }
556      if (m_confirmationThreshold < 0 || m_confirmationThreshold > 1) {
557        throw new Exception("Confirmation threshold has to be between "
558                            + "zero and one!");
559      }
560      if (bestString.length() != 0) {
561        throw new Exception("Specifying both a number of confirmation "
562                            + "values and a confirmation threshold "
563                            + "doesn't make sense!");
564      }
565      if (m_confirmationThreshold != 0) {
566        m_best = 0;
567      }
568    }
569    String noiseThresholdString = Utils.getOption('N', options);
570    if (noiseThresholdString.length() != 0) {
571      try {
572        m_noiseThreshold = (new Double(noiseThresholdString)).doubleValue();
573      } catch (Exception e) {
574        throw new Exception("Invalid value for -N option: "
575                            + e.getMessage() + ".");
576      }
577      if (m_noiseThreshold < 0 || m_noiseThreshold > 1) {
578        throw new Exception("Noise threshold has to be between "
579                            + "zero and one!");
580      }
581    }
582
583    /* Search space and language bias options. */
584    m_repeat = Utils.getFlag('R', options);
585    String numLiteralsString = Utils.getOption('L', options);
586    if (numLiteralsString.length() != 0) {
587      try {
588        m_numLiterals = Integer.parseInt(numLiteralsString);
589      } catch (Exception e) {
590        throw new Exception("Invalid value for -L option: "
591                            + e.getMessage() + ".");
592      }
593      if (m_numLiterals < 1) {
594        throw new Exception("Number of literals has to be "
595                            + "greater than one!");
596      }
597    }
598    String negationString = Utils.getOption('G', options);
599    if (negationString.length() != 0) {
600      SelectedTag selected;
601      int tag;
602      try {
603        tag = Integer.parseInt(negationString);
604      } catch (Exception e) {
605        throw new Exception("Invalid value for -G option: "
606                            + e.getMessage() + ".");
607      }
608      try {
609        selected = new SelectedTag(tag, TAGS_NEGATION);
610      } catch (Exception e) {
611        throw new Exception("Value for -G option has to be "
612                            + "between zero and three!");
613      }
614      setNegation(selected);
615    }
616    m_classification = Utils.getFlag('S', options);
617    String classIndexString = Utils.getOption('c', options);
618    if (classIndexString.length() != 0) {
619      try {
620        m_classIndex = Integer.parseInt(classIndexString);
621      } catch (Exception e) {
622        throw new Exception("Invalid value for -c option: "
623                            + e.getMessage() + ".");
624      }
625    }
626    m_horn = Utils.getFlag('H', options);
627    if (m_horn && (m_negation != NONE)) {
628      throw new Exception("Considering horn clauses doesn't make sense "
629                          + "if negation allowed!");
630    }
631   
632    /* Subsumption tests options. */
633    m_equivalent = !(Utils.getFlag('E', options));
634    m_sameClause = !(Utils.getFlag('M', options));
635    m_subsumption = !(Utils.getFlag('T', options));
636
637    /* Missing values options. */
638    String missingString = Utils.getOption('I', options);
639    if (missingString.length() != 0) {
640      SelectedTag selected;
641      int tag;
642      try {
643        tag = Integer.parseInt(missingString);
644      } catch (Exception e) {
645        throw new Exception("Invalid value for -I option: "
646                            + e.getMessage() + ".");
647      }
648      try {
649        selected = new SelectedTag(tag, TAGS_MISSING);
650      } catch (Exception e) {
651        throw new Exception("Value for -I option has to be "
652                            + "between zero and two!");
653      }
654      setMissingValues(selected);
655    }
656
657    /* ROC analysis. */
658    m_roc = Utils.getFlag('O', options);
659
660
661    /* Individual-based learning. */
662    m_partsString = Utils.getOption('p', options);
663    if (m_partsString.length() != 0) {
664      Reader reader;
665      try {
666        reader = new BufferedReader(new FileReader(m_partsString));
667      } catch (Exception e) {
668        throw new Exception("Can't open file " + e.getMessage() + ".");
669      }
670      m_parts = new Instances(reader); 
671    }
672
673    /* Values output. */
674    String printValuesString = Utils.getOption('P', options);
675    if (printValuesString.length() != 0) {
676      SelectedTag selected;
677      int tag;
678      try {
679        tag = Integer.parseInt(printValuesString);
680      } catch (Exception e) {
681        throw new Exception("Invalid value for -P option: "
682                            + e.getMessage() + ".");
683      }
684      try {
685        selected = new SelectedTag(tag, TAGS_VALUES);
686      } catch (Exception e) {
687        throw new Exception("Value for -P option has to be "
688                            + "between zero and two!");
689      }
690      setValuesOutput(selected);
691    }
692  }
693
694  /**
695   * Gets the current settings of the Tertius object.
696   *
697   * @return An array of strings suitable for passing to setOptions.
698   */
699  public String [] getOptions() {
700    Vector      result;
701
702    result = new Vector();
703
704    /* Pruning options. */
705    if (m_best > 0) {
706      result.add("-K");
707      result.add("" + m_best);
708    }
709   
710    result.add("-F");
711    result.add("" + m_frequencyThreshold);
712   
713    if (m_confirmationThreshold > 0) {
714      result.add("-C");
715      result.add("" + m_confirmationThreshold);
716    }
717   
718    result.add("-N");
719    result.add("" + m_noiseThreshold);
720   
721    /* Search space and language bias options. */
722    if (m_repeat)
723      result.add("-R");
724   
725    result.add("-L");
726    result.add("" + m_numLiterals);
727   
728    result.add("-G");
729    result.add("" + m_negation);
730   
731    if (m_classification)
732      result.add("-S");
733     
734    result.add("-c");
735    result.add("" + m_classIndex);
736   
737    if (m_horn)
738      result.add("-H");
739
740    /* Subsumption tests options. */
741    if (!m_equivalent)
742      result.add("-E");
743   
744    if (!m_sameClause)
745      result.add("-M");
746   
747    if (!m_subsumption)
748      result.add("-T");
749
750    /* Missing values options. */
751    result.add("-I");
752    result.add("" + m_missing);
753
754    /* ROC analysis. */
755    if (m_roc)
756      result.add("-O");
757
758    /* Individual-based learning. */
759    if (m_partsString.length() > 0) {
760      result.add("-p");
761      result.add("" + m_partsString);
762    }
763
764    /* Values output. */
765    result.add("-P");
766    result.add("" + m_printValues);
767
768    return (String[]) result.toArray(new String[result.size()]);         
769  }
770
771  /**
772   * Returns the tip text for this property.
773   *
774   * @return Tip text for this property suitable for
775   * displaying in the explorer/experimenter GUI.
776   */
777  public String confirmationValuesTipText() {
778
779    return "Number of best confirmation values to find.";
780  }
781
782  /**
783   * Get the value of confirmationValues.
784   *
785   * @return Value of confirmationValues.
786   */
787  public int getConfirmationValues() {
788
789    return m_best;
790  }
791
792  /**
793   * Set the value of confirmationValues.
794   *
795   * @param v  Value to assign to confirmationValues.
796   */
797  public void setConfirmationValues(int v) {
798
799    m_best = v;
800  }
801
802  /**
803   * Returns the tip text for this property.
804   *
805   * @return Tip text for this property suitable for
806   * displaying in the explorer/experimenter GUI.
807   */
808  public String frequencyThresholdTipText() {
809   
810    return "Minimum proportion of instances satisfying head and body of rules";
811  }
812
813  /**
814   * Get the value of frequencyThreshold.
815   *
816   * @return Value of frequencyThreshold.
817   */
818  public double getFrequencyThreshold() {
819   
820    return m_frequencyThreshold;
821  }
822
823  /**
824   * Set the value of frequencyThreshold.
825   *
826   * @param v  Value to assign to frequencyThreshold.
827   */
828  public void setFrequencyThreshold(double v) {
829   
830    m_frequencyThreshold = v;
831  }
832
833  /**
834   * Returns the tip text for this property.
835   *
836   * @return Tip text for this property suitable for
837   * displaying in the explorer/experimenter GUI.
838   */
839  public String confirmationThresholdTipText() {
840   
841    return "Minimum confirmation of the rules.";
842  }
843
844  /**
845   * Get the value of confirmationThreshold.
846   *
847   * @return Value of confirmationThreshold.
848   */
849  public double getConfirmationThreshold() {
850   
851    return m_confirmationThreshold;
852  }
853
854  /**
855   * Set the value of confirmationThreshold.
856   *
857   * @param v  Value to assign to confirmationThreshold.
858   */
859  public void setConfirmationThreshold(double v) {
860   
861    m_confirmationThreshold = v;
862    if (v != 0) {
863      m_best = 0;
864    }
865  }
866
867  /**
868   * Returns the tip text for this property.
869   *
870   * @return Tip text for this property suitable for
871   * displaying in the explorer/experimenter GUI.
872   */
873  public String noiseThresholdTipText() {
874   
875    return "Maximum proportion of counter-instances of rules. "
876      + "If set to 0, only satisfied rules will be given.";
877  }
878
879  /**
880   * Get the value of noiseThreshold.
881   *
882   * @return Value of noiseThreshold.
883   */
884  public double getNoiseThreshold() {
885   
886    return m_noiseThreshold;
887  }
888
889  /**
890   * Set the value of noiseThreshold.
891   *
892   * @param v  Value to assign to noiseThreshold.
893   */
894  public void setNoiseThreshold(double v) {
895   
896    m_noiseThreshold = v;
897  }
898
899  /**
900   * Returns the tip text for this property.
901   *
902   * @return Tip text for this property suitable for
903   * displaying in the explorer/experimenter GUI.
904   */
905  public String repeatLiteralsTipText() {
906
907    return "Repeated attributes allowed.";
908  }
909
910  /**
911   * Get the value of repeatLiterals.
912   *
913   * @return Value of repeatLiterals.
914   */
915  public boolean getRepeatLiterals() {
916   
917    return m_repeat;
918  }
919
920  /**
921   * Set the value of repeatLiterals.
922   *
923   * @param v  Value to assign to repeatLiterals.
924   */
925  public void setRepeatLiterals(boolean v) {
926   
927    m_repeat = v;
928  }
929
930  /**
931   * Returns the tip text for this property.
932   *
933   * @return Tip text for this property suitable for
934   * displaying in the explorer/experimenter GUI.
935   */
936  public String numberLiteralsTipText() {
937   
938    return "Maximum number of literals in a rule.";
939  }
940
941  /**
942   * Get the value of numberLiterals.
943   *
944   * @return Value of numberLiterals.
945   */
946  public int getNumberLiterals() {
947
948    return m_numLiterals;
949  }
950
951  /**
952   * Set the value of numberLiterals.
953   *
954   * @param v  Value to assign to numberLiterals.
955   */
956  public void setNumberLiterals(int v) {
957   
958    m_numLiterals = v;
959  }
960
961  /**
962   * Returns the tip text for this property.
963   *
964   * @return Tip text for this property suitable for
965   * displaying in the explorer/experimenter GUI.
966   */
967  public String negationTipText() {
968   
969    return "Set the type of negation allowed in the rule. "
970      + "Negation can be allowed in the body, in the head, in both "
971      + "or in none.";
972  }
973
974  /**
975   * Get the value of negation.
976   *
977   * @return Value of negation.
978   */
979  public SelectedTag getNegation() {
980   
981    return new SelectedTag(m_negation, TAGS_NEGATION);
982  }
983
984  /**
985   * Set the value of negation.
986   *
987   * @param v  Value to assign to negation.
988   */
989  public void setNegation(SelectedTag v) {
990   
991    if (v.getTags() == TAGS_NEGATION) {
992      m_negation = v.getSelectedTag().getID();
993    }
994  }
995
996  /**
997   * Returns the tip text for this property.
998   *
999   * @return Tip text for this property suitable for
1000   * displaying in the explorer/experimenter GUI.
1001   */
1002  public String classificationTipText() {
1003   
1004    return "Find only rules with the class in the head.";
1005  }
1006
1007  /**
1008   * Get the value of classification.
1009   *
1010   * @return Value of classification.
1011   */
1012  public boolean getClassification() {
1013
1014    return m_classification;
1015  }
1016
1017  /**
1018   * Set the value of classification.
1019   *
1020   * @param v  Value to assign to classification.
1021   */
1022  public void setClassification(boolean v) {
1023
1024    m_classification = v;
1025  }
1026
1027  /**
1028   * Returns the tip text for this property.
1029   *
1030   * @return Tip text for this property suitable for
1031   * displaying in the explorer/experimenter GUI.
1032   */
1033  public String classIndexTipText() {
1034
1035    return "Index of the class attribute. If set to 0, the class will be the last attribute.";
1036  }
1037
1038  /**
1039   * Get the value of classIndex.
1040   *
1041   * @return Value of classIndex.
1042   */
1043  public int getClassIndex() {
1044
1045    return m_classIndex;
1046  }
1047
1048  /**
1049   * Set the value of classIndex.
1050   *
1051   * @param v  Value to assign to classIndex.
1052   */
1053  public void setClassIndex(int v) {
1054
1055    m_classIndex = v;
1056  }
1057
1058  /**
1059   * Returns the tip text for this property.
1060   *
1061   * @return Tip text for this property suitable for
1062   * displaying in the explorer/experimenter GUI.
1063   */
1064  public String hornClausesTipText() {
1065   
1066    return "Find rules with a single conclusion literal only.";
1067  }
1068
1069  /**
1070   * Get the value of hornClauses.
1071   *
1072   * @return Value of hornClauses.
1073   */
1074  public boolean getHornClauses() {
1075
1076    return m_horn;
1077  }
1078
1079  /**
1080   * Set the value of hornClauses.
1081   *
1082   * @param v  Value to assign to hornClauses.
1083   */
1084  public void setHornClauses(boolean v) {
1085
1086    m_horn = v;
1087  }
1088 
1089  /**
1090   * Returns the tip text for this property.
1091   *
1092   * @return Tip text for this property suitable for
1093   * displaying in the explorer/experimenter GUI.
1094   */
1095  public String equivalentTipText() {
1096   
1097    return "Keep equivalent rules. "
1098      + "A rule r2 is equivalent to a rule r1 if the body of r2 is the "
1099      + "negation of the head of r1, and the head of r2 is the "
1100      + "negation of the body of r1.";
1101  }
1102
1103  /**
1104   * Get the value of equivalent.
1105   *
1106   * @return Value of equivalent.
1107   */
1108  public boolean disabled_getEquivalent() {
1109   
1110    return !m_equivalent;
1111  }
1112
1113  /**
1114   * Set the value of equivalent.
1115   *
1116   * @param v  Value to assign to equivalent.
1117   */
1118  public void disabled_setEquivalent(boolean v) {
1119   
1120    m_equivalent = !v;
1121  }
1122
1123  /**
1124   * Returns the tip text for this property.
1125   *
1126   * @return Tip text for this property suitable for
1127   * displaying in the explorer/experimenter GUI.
1128   */
1129  public String sameClauseTipText() {
1130
1131    return "Keep rules corresponding to the same clauses. "
1132      + "If set to false, only the rule with the best confirmation "
1133      + "value and rules with a lower number of counter-instances "
1134      + "will be kept.";
1135  }
1136
1137  /**
1138   * Get the value of sameClause.
1139   *
1140   * @return Value of sameClause.
1141   */
1142  public boolean disabled_getSameClause() {
1143
1144    return !m_sameClause;
1145  }
1146
1147  /**
1148   * Set the value of sameClause.
1149   *
1150   * @param v  Value to assign to sameClause.
1151   */
1152  public void disabled_setSameClause(boolean v) {
1153
1154    m_sameClause = !v;
1155  }
1156
1157  /**
1158   * Returns the tip text for this property.
1159   *
1160   * @return Tip text for this property suitable for
1161   * displaying in the explorer/experimenter GUI.
1162   */
1163  public String subsumptionTipText() {
1164
1165    return "Keep subsumed rules. "
1166      + "If set to false, subsumed rules will only be kept if they "
1167      + "have a better confirmation or a lower number of counter-instances.";
1168  }
1169
1170  /**
1171   * Get the value of subsumption.
1172   *
1173   * @return Value of subsumption.
1174   */
1175  public boolean disabled_getSubsumption() {
1176
1177    return !m_subsumption;
1178  }
1179
1180  /**
1181   * Set the value of subsumption.
1182   *
1183   * @param v  Value to assign to subsumption.
1184   */
1185  public void disabled_setSubsumption(boolean v) {
1186
1187    m_subsumption = !v;
1188  }
1189
1190  /**
1191   * Returns the tip text for this property.
1192   *
1193   * @return Tip text for this property suitable for
1194   * displaying in the explorer/experimenter GUI.
1195   */
1196  public String missingValuesTipText() {
1197   
1198    return "Set the way to handle missing values. "
1199      + "Missing values can be set to match any value, or never match values "
1200      + "or to be significant and possibly appear in rules.";
1201  }
1202
1203  /**
1204   * Get the value of missingValues.
1205   *
1206   * @return Value of missingValues.
1207   */
1208  public SelectedTag getMissingValues() {
1209   
1210    return new SelectedTag(m_missing, TAGS_MISSING);
1211  }
1212
1213  /**
1214   * Set the value of missingValues.
1215   *
1216   * @param v  Value to assign to missingValues.
1217   */
1218  public void setMissingValues(SelectedTag v) {
1219   
1220    if (v.getTags() == TAGS_MISSING) {
1221      m_missing = v.getSelectedTag().getID();
1222    }
1223  }
1224
1225  /**
1226   * Returns the tip text for this property.
1227   *
1228   * @return Tip text for this property suitable for
1229   * displaying in the explorer/experimenter GUI.
1230   */
1231  public String rocAnalysisTipText() {
1232   
1233    return "Return TP-rate and FP-rate for each rule found.";
1234  }
1235
1236  /**
1237   * Get the value of rocAnalysis.
1238   *
1239   * @return Value of rocAnalysis.
1240   */
1241  public boolean getRocAnalysis() {
1242
1243    return m_roc;
1244  }
1245
1246  /**
1247   * Set the value of rocAnalysis.
1248   *
1249   * @param v  Value to assign to rocAnalysis.
1250   */
1251  public void setRocAnalysis(boolean v) {
1252
1253    m_roc = v;
1254  }
1255
1256  /**
1257   * Returns the tip text for this property.
1258   *
1259   * @return Tip text for this property suitable for
1260   * displaying in the explorer/experimenter GUI.
1261   */
1262  public String partFileTipText() {
1263   
1264    return "Set file containing the parts of the individual "
1265      + "for individual-based learning.";
1266  }
1267
1268  /**
1269   * Get the value of partFile.
1270   *
1271   * @return Value of partFile.
1272   */
1273  public File disabled_getPartFile() {
1274
1275    return new File(m_partsString);
1276  }
1277
1278  /**
1279   * Set the value of partFile.
1280   *
1281   * @param v  Value to assign to partFile.
1282   * @throws Exception if file cannot be opened
1283   */
1284  public void disabled_setPartFile(File v) throws Exception {
1285
1286    m_partsString = v.getAbsolutePath();
1287    if (m_partsString.length() != 0) {
1288      Reader reader;
1289      try {
1290        reader = new BufferedReader(new FileReader(m_partsString));
1291      } catch (Exception e) {
1292        throw new Exception("Can't open file " + e.getMessage() + ".");
1293      }
1294      m_parts = new Instances(reader); 
1295    }
1296  }
1297
1298  /**
1299   * Returns the tip text for this property.
1300   *
1301   * @return Tip text for this property suitable for
1302   * displaying in the explorer/experimenter GUI.
1303   */
1304  public String valuesOutputTipText() {
1305   
1306    return "Give visual feedback during the search. "
1307      + "The current best and worst values can be output either to stdout or to a separate window.";
1308  }
1309
1310  /**
1311   * Get the value of valuesOutput.
1312   *
1313   * @return Value of valuesOutput.
1314   */
1315  public SelectedTag getValuesOutput() {
1316   
1317    return new SelectedTag(m_printValues, TAGS_VALUES);
1318  }
1319
1320  /**
1321   * Set the value of valuesOutput.
1322   *
1323   * @param v  Value to assign to valuesOutput.
1324   */
1325  public void setValuesOutput(SelectedTag v) {
1326   
1327    if (v.getTags() == TAGS_VALUES) {
1328      m_printValues = v.getSelectedTag().getID();
1329    }
1330  }
1331
1332  /**
1333   * Build the predicate corresponding to an attribute.
1334   *
1335   * @param instances The instances this predicates comes from.
1336   * @param attr The attribute this predicate corresponds to.
1337   * @param isClass Saying if the attribute is the class attribute.
1338   * @return The corresponding Predicate.
1339   * @throws Exception if the predicate could not be build
1340   * (the attribute is numeric).
1341   */
1342  private Predicate buildPredicate(Instances instances,
1343                                   Attribute attr, boolean isClass) 
1344    throws Exception {
1345
1346    Predicate predicate; /* The result. */
1347    Literal lit;
1348    Literal negation;
1349    boolean missingValues; /* Missing values for this attribute ? */
1350    boolean individual = (m_parts != null); /* Individual-based learning ? */
1351    int type = (instances == m_parts)
1352      ? IndividualLiteral.PART_PROPERTY
1353      : IndividualLiteral.INDIVIDUAL_PROPERTY; /* Type of property. */
1354
1355    if (attr.isNumeric()) {
1356      throw new Exception("Can't handle numeric attributes!");
1357    }
1358       
1359    missingValues = instances.attributeStats(attr.index()).missingCount > 0;
1360
1361    /* Build predicate. */
1362    if (individual) {
1363      predicate = new Predicate(instances.relationName() + "." + attr.name(), 
1364                                attr.index(), isClass);
1365    } else {
1366      predicate = new Predicate(attr.name(), attr.index(), isClass);
1367    }
1368       
1369    if (attr.numValues() == 2
1370        && (!missingValues || m_missing == EXPLICIT)) {
1371      /* Case of two values.
1372       * If there are missing values, this case is treated like other cases.
1373       */
1374      if (individual) {
1375        lit = new IndividualLiteral(predicate, attr.value(0), 0, 
1376                                    Literal.POS, m_missing, type);
1377        negation = new IndividualLiteral(predicate, attr.value(1), 1, 
1378                                         Literal.POS, m_missing, type);
1379      } else {
1380        lit = new AttributeValueLiteral(predicate, attr.value(0), 0, 
1381                                        Literal.POS, m_missing);
1382        negation = new AttributeValueLiteral(predicate, attr.value(1), 1, 
1383                                             Literal.POS, m_missing);
1384      }
1385      lit.setNegation(negation);
1386      negation.setNegation(lit);
1387      predicate.addLiteral(lit);     
1388    } else {
1389      /* Case of several values. */
1390      for (int i = 0; i < attr.numValues(); i++) {
1391        if (individual) {
1392          lit = new IndividualLiteral(predicate, attr.value(i), i,
1393                                      Literal.POS, m_missing, type);
1394        } else {
1395          lit = new AttributeValueLiteral(predicate, attr.value(i), i, 
1396                                          Literal.POS, m_missing);
1397        }
1398        if (m_negation != NONE) {
1399          if (individual) {
1400            negation = new IndividualLiteral(predicate, attr.value(i), i, 
1401                                             Literal.NEG, m_missing, type);
1402          } else {
1403            negation = new AttributeValueLiteral(predicate, attr.value(i), i, 
1404                                                 Literal.NEG, m_missing);
1405          }
1406          lit.setNegation(negation);
1407          negation.setNegation(lit);
1408        }
1409        predicate.addLiteral(lit);
1410      }
1411
1412      /* One more value if missing is significant. */
1413      if (missingValues && m_missing == SIGNIFICANT) {
1414        if (individual) {
1415          lit = new IndividualLiteral(predicate, "?", -1, 
1416                                      Literal.POS, m_missing, type);
1417        } else {
1418          lit = new AttributeValueLiteral(predicate, "?", -1, 
1419                                          Literal.POS, m_missing);
1420        }
1421        if (m_negation != NONE) {
1422          if (individual) {
1423            negation = new IndividualLiteral(predicate, "?", -1, 
1424                                             Literal.NEG, m_missing, type);
1425          } else {
1426            negation = new AttributeValueLiteral(predicate, "?", -1, 
1427                                                 Literal.NEG, m_missing);
1428          }
1429          lit.setNegation(negation);
1430          negation.setNegation(lit);
1431        }
1432        predicate.addLiteral(lit);
1433      }
1434    }
1435    return predicate;
1436  }
1437   
1438  /**
1439   * Build the predicates to use in the rules.
1440   *
1441   * @return the predicates
1442   * @throws Exception If the predicates could not be built
1443   * (numeric attribute).
1444   */
1445  private ArrayList buildPredicates() throws Exception {
1446
1447    ArrayList predicates = new ArrayList(); /* The result. */
1448    Predicate predicate;
1449    Attribute attr;
1450    Enumeration attributes = m_instances.enumerateAttributes();
1451    boolean individual = (m_parts != null); /* Individual-based learning ? */
1452
1453    /* Attributes. */
1454    while (attributes.hasMoreElements()) {
1455      attr = (Attribute) attributes.nextElement();
1456      /* Identifiers do not appear in rules in individual-based learning. */
1457      if (!(individual && attr.name().equals("id"))) {
1458        predicate = buildPredicate(m_instances, attr, false);
1459        predicates.add(predicate);
1460      }
1461    }
1462    /* Class attribute. */
1463    attr = m_instances.classAttribute();
1464    /* Identifiers do not appear in rules. */
1465    if (!(individual && attr.name().equals("id"))) {
1466      predicate = buildPredicate(m_instances, attr, true);
1467      predicates.add(predicate);
1468    }
1469
1470    /* Attributes of the parts in individual-based learning. */
1471    if (individual) {
1472      attributes = m_parts.enumerateAttributes();
1473      while (attributes.hasMoreElements()) {
1474        attr = (Attribute) attributes.nextElement();
1475        /* Identifiers do not appear in rules. */
1476        if (!attr.name().equals("id")) {
1477          predicate = buildPredicate(m_parts, attr, false);
1478          predicates.add(predicate);
1479        }
1480      }
1481    }
1482       
1483    return predicates;
1484  }
1485
1486  /**
1487   * Count the number of distinct confirmation values in the results.
1488   *
1489   * @return Number of confirmation values in the results.
1490   */
1491  private int numValuesInResult() {
1492
1493    int result = 0;
1494    SimpleLinkedList.LinkedListIterator iter = m_results.iterator();
1495    Rule current;
1496    Rule next;
1497    if (!iter.hasNext()) {
1498      return result;
1499    } else {
1500      current = (Rule) iter.next();
1501      while (iter.hasNext()) {
1502        next = (Rule) iter.next();
1503        if (current.getConfirmation() > next.getConfirmation()) {
1504          result++;
1505        }
1506        current = next;
1507      }
1508      return result + 1;
1509    }
1510  }
1511
1512  /**
1513   * Test if it is worth refining a rule.
1514   *
1515   * @param rule The rule to consider.
1516   * @return True if the rule can be refined, false otherwise.
1517   */
1518  private boolean canRefine(Rule rule) {
1519    if (rule.isEmpty()) {
1520      return true;
1521    }
1522    if (m_best != 0) {
1523      if (numValuesInResult() < m_best) {
1524        return true;
1525      }
1526      Rule worstResult = (Rule) m_results.getLast();
1527      if (rule.getOptimistic() >= worstResult.getConfirmation()) {
1528        return true;
1529      }
1530      return false;
1531    } else {
1532      return true;
1533    }
1534  }
1535
1536  /**
1537   * Test if it is worth calculating the optimistic estimate of a rule.
1538   *
1539   * @param rule The rule to consider.
1540   * @return True if the optimistic estimate can be calculated, false otherwise.
1541   */
1542  private boolean canCalculateOptimistic(Rule rule) {
1543    if (rule.hasTrueBody() || rule.hasFalseHead()) {
1544      return false;
1545    }
1546    if (!rule.overFrequencyThreshold(m_frequencyThreshold)) {
1547      return false;
1548    }
1549    return true;
1550  }
1551
1552  /**
1553   * Test if a rule can be explored (if it is interesting for the results
1554   * or for refining).
1555   *
1556   * @param rule The rule to consider.
1557   * @return True if the rule can be explored, false otherwise.
1558   */
1559  private boolean canExplore(Rule rule) {
1560    if (rule.getOptimistic() < m_confirmationThreshold) {
1561      return false;
1562    }
1563    if (m_best != 0) {
1564      if (numValuesInResult() < m_best) {
1565        return true;
1566      } 
1567      Rule worstResult = (Rule) m_results.getLast();
1568      if (rule.getOptimistic() >= worstResult.getConfirmation()) {
1569        return true;
1570      }
1571      return false;     
1572    } else {
1573      return true;
1574    }
1575  }   
1576
1577  /**
1578   * Test if a rule can be stored in the agenda.
1579   *
1580   * @param rule The rule to consider.
1581   * @return True if the rule can be stored, false otherwise.
1582   */
1583  private boolean canStoreInNodes(Rule rule) {
1584    if (rule.getObservedNumber() == 0) {
1585      return false;
1586    }
1587    return true;
1588  }
1589
1590  /**
1591   * Test if it is worth calculating the confirmation of a rule.
1592   *
1593   * @param rule The rule to consider.
1594   * @return True if the confirmation can be calculated, false otherwise.
1595   */
1596  private boolean canCalculateConfirmation(Rule rule) {
1597    if (rule.getObservedFrequency() > m_noiseThreshold) {
1598      return false;
1599    }
1600    return true;
1601  }
1602
1603  /**
1604   * Test if a rule can be added to the results.
1605   *
1606   * @param rule The rule to consider.
1607   * @return True if the rule can be stored, false otherwise.
1608   */
1609  private boolean canStoreInResults(Rule rule) {
1610    if (rule.getConfirmation() < m_confirmationThreshold) {
1611      return false;
1612    }
1613    if (m_best != 0) {
1614      if (numValuesInResult() < m_best) {
1615        return true;
1616      }
1617      Rule worstResult = (Rule) m_results.getLast();
1618      if (rule.getConfirmation() >= worstResult.getConfirmation()) {
1619        return true;
1620      }
1621      return false;   
1622    } else {
1623      return true;
1624    }
1625  }
1626
1627  /**
1628   * Add a rule in the appropriate place in the list of the results,
1629   * according to the confirmation and
1630   * number of counter-instances of the rule. <p>
1631   * Subsumption tests are performed and the rule may not be added. <p>
1632   * Previous results may also be removed because of sumption.
1633   */
1634  private void addResult(Rule rule) {
1635
1636    Rule current;
1637    boolean added = false;
1638
1639    /* Iterate the list until we find the right place. */
1640    SimpleLinkedList.LinkedListIterator iter = m_results.iterator();
1641    while (iter.hasNext()) {
1642      current = (Rule) iter.next();
1643      if (Rule.confirmationThenObservedComparator.compare(current, rule) > 0) {
1644        iter.addBefore(rule);
1645        added = true;
1646        break;
1647      }
1648      /* Subsumption tests to see if the rule can be added. */
1649      if ((m_subsumption || m_sameClause || m_equivalent)
1650          && current.subsumes(rule)) {
1651        if (current.numLiterals() == rule.numLiterals()) {
1652          if (current.equivalentTo(rule)) {
1653            /* Equivalent rules. */
1654            if (m_equivalent) {
1655              return;
1656            }
1657          } else {
1658            /* Same clauses. */
1659            if (m_sameClause
1660                && Rule.confirmationComparator.compare(current, rule) < 0) {
1661              return;
1662            }
1663          }
1664        } else {
1665          /* Subsumption. */
1666          if (m_subsumption
1667              && Rule.observedComparator.compare(current, rule) <= 0) { 
1668            return;
1669          }
1670        }
1671      }
1672    }
1673
1674    if (added == false) {
1675      /* The rule must be added in the end of the results. */
1676      m_results.add(rule);
1677    }
1678
1679    /* Iterate the results with a lower confirmation
1680     *  to see if some of them must be removed. */
1681    SimpleLinkedList.LinkedListInverseIterator inverse
1682      = m_results.inverseIterator();
1683    while (inverse.hasPrevious()) {
1684      current = (Rule) inverse.previous();
1685      if (Rule.confirmationThenObservedComparator.compare(current, rule) < 0) {
1686        break;
1687      }
1688      if (current != rule && rule.subsumes(current)) {
1689        if (current.numLiterals() == rule.numLiterals()) {
1690          if (!current.equivalentTo(rule)) {
1691            /* Same clauses. */
1692            if (m_sameClause
1693                && Rule.confirmationComparator.compare(current, rule) > 0) {
1694              inverse.remove();
1695            }
1696          }
1697        } else {
1698          /* Subsumption. */
1699          if (m_subsumption
1700              && Rule.observedComparator.compare(rule, current) <= 0) { 
1701            inverse.remove();
1702          }
1703        }       
1704      }
1705    }
1706
1707    /* Remove the rules with the worst confirmation value
1708     * if there are too many results. */
1709    if (m_best != 0 && numValuesInResult() > m_best) {
1710      Rule worstRule = (Rule) m_results.getLast();
1711      inverse = m_results.inverseIterator();
1712      while (inverse.hasPrevious()) {
1713        current = (Rule) inverse.previous();
1714        if (Rule.confirmationComparator.compare(current, worstRule) < 0) {
1715          break;
1716        }
1717        inverse.remove();
1718      }
1719    }
1720
1721    /* Print the new current values. */
1722    printValues();
1723  }
1724
1725  /**
1726   * Returns default capabilities of the classifier.
1727   *
1728   * @return      the capabilities of this classifier
1729   */
1730  public Capabilities getCapabilities() {
1731    Capabilities result = super.getCapabilities();
1732    result.disableAll();
1733
1734    // attributes
1735    result.enable(Capability.NOMINAL_ATTRIBUTES);
1736    result.enable(Capability.MISSING_VALUES);
1737
1738    // class
1739    result.enable(Capability.NOMINAL_CLASS);
1740    result.enable(Capability.MISSING_CLASS_VALUES);
1741   
1742    return result;
1743  }
1744 
1745  /**
1746   * Method that launches the search to find the rules with the highest
1747   * confirmation.
1748   *
1749   * @param instances The instances to be used for generating the rules.
1750   * @throws Exception if rules can't be built successfully.
1751   */
1752  public void buildAssociations(Instances instances) throws Exception {
1753
1754    Frame valuesFrame = null; /* Frame to display the current values. */
1755
1756    /* Initialization of the search. */
1757    if (m_parts == null) {
1758      m_instances = new Instances(instances);
1759    } else {
1760      m_instances = new IndividualInstances(new Instances(instances), m_parts);
1761    }   
1762    m_results = new SimpleLinkedList();
1763    m_hypotheses = 0;
1764    m_explored = 0;
1765    m_status = NORMAL;
1766
1767    if (m_classIndex == -1)
1768      m_instances.setClassIndex(m_instances.numAttributes()-1);     
1769    else if (m_classIndex < m_instances.numAttributes() && m_classIndex >= 0)
1770      m_instances.setClassIndex(m_classIndex);
1771    else
1772      throw new Exception("Invalid class index.");
1773   
1774    // can associator handle the data?
1775    getCapabilities().testWithFail(m_instances);
1776   
1777    /* Initialization of the window for current values. */
1778    if (m_printValues == WINDOW) {
1779      m_valuesText = new TextField(37);
1780      m_valuesText.setEditable(false);
1781      m_valuesText.setFont(new Font("Monospaced", Font.PLAIN, 12));   
1782      Label valuesLabel = new Label("Best and worst current values:");
1783      Button stop = new Button("Stop search");
1784      stop.addActionListener(new ActionListener() {
1785          public void actionPerformed(ActionEvent e) {
1786            /* Signal the interruption to the search. */
1787            m_status = STOP;
1788          }
1789        });
1790      valuesFrame = new Frame("Tertius status");
1791      valuesFrame.setResizable(false);
1792      valuesFrame.add(m_valuesText, BorderLayout.CENTER);
1793      valuesFrame.add(stop, BorderLayout.SOUTH);
1794      valuesFrame.add(valuesLabel, BorderLayout.NORTH);
1795      valuesFrame.pack();
1796      valuesFrame.setVisible(true);
1797    } else if (m_printValues == OUT) {
1798      System.out.println("Best and worst current values:");
1799    }
1800     
1801    Date start = new Date();
1802
1803    /* Build the predicates and launch the search. */
1804    m_predicates = buildPredicates();
1805    beginSearch();   
1806
1807    Date end = new Date();
1808
1809    if (m_printValues == WINDOW) {
1810      valuesFrame.dispose();
1811    }
1812
1813    m_time = new Date(end.getTime() - start.getTime());
1814  }
1815
1816  /**
1817   * Run the search.
1818   */
1819  public void run() {
1820    try {
1821      search();
1822    } catch (OutOfMemoryError e) {
1823      /* Garbage collect what can be collected to be able to continue. */
1824      System.gc();
1825      m_status = MEMORY;
1826    }
1827    endSearch();
1828  }
1829
1830  /**
1831   * Begin the search by starting a new thread.
1832   */
1833  private synchronized void beginSearch() throws Exception {
1834    /* This method must be synchronized to be able to
1835     * call the wait() method. */
1836    Thread search = new Thread(this);
1837    search.start();
1838    try {
1839      /* Wait for the end of the thread. */
1840      wait();
1841    } catch (InterruptedException e) {
1842      /* Signal the interruption to the search. */
1843      m_status = STOP;
1844    }
1845  }
1846
1847  /**
1848   * End the search by notifying to the waiting thread that it is finished.
1849   */
1850  private synchronized void endSearch() {
1851    /* This method must be synchronized to be able to
1852     * call the notify() method. */
1853    notify();
1854  }
1855
1856  /**
1857   * Search in the space of hypotheses the rules that have the highest
1858   * confirmation.
1859   * The search starts with the empty rule, other rules are generated by
1860   * refinement.
1861   */
1862  public void search() {
1863
1864    SimpleLinkedList nodes = new SimpleLinkedList(); /* The agenda. */
1865    Rule currentNode;
1866    SimpleLinkedList children;
1867    SimpleLinkedList.LinkedListIterator iter;
1868    Rule child;
1869    boolean negBody = (m_negation == BODY || m_negation == ALL);
1870    boolean negHead = (m_negation == HEAD || m_negation == ALL);
1871
1872    /* Start with the empty rule. */
1873      nodes.add(new Rule(m_repeat, m_numLiterals, negBody, negHead,
1874                         m_classification, m_horn));
1875   
1876    /* Print the current values. */
1877    printValues();
1878
1879    /* Explore the rules in the agenda. */
1880    while (m_status != STOP && !nodes.isEmpty()) {
1881      currentNode = (Rule) nodes.removeFirst();
1882      if (canRefine(currentNode)) {
1883        children = currentNode.refine(m_predicates);
1884        iter = children.iterator();
1885        /* Calculate the optimistic estimate of the children and
1886         * consider them for adding to the agenda and to the results. */
1887        while (iter.hasNext()) {
1888          m_hypotheses++;
1889          child = (Rule) iter.next();
1890          child.upDate(m_instances);
1891          if (canCalculateOptimistic(child)) {
1892            child.calculateOptimistic();
1893            if (canExplore(child)) {
1894              m_explored++;
1895              if (canStoreInNodes(child)) {
1896              } else {
1897                iter.remove();
1898              }
1899              if (canCalculateConfirmation(child)) {
1900                child.calculateConfirmation();
1901                if (canStoreInResults(child)) {
1902                  addResult(child);
1903                }         
1904              }
1905            } else {
1906              iter.remove();
1907            }
1908          } else {
1909            iter.remove();
1910          }
1911        }
1912        /* Since the agenda is already sorted it is more efficient
1913         * to sort the children only and then merge. */
1914        children.sort(Rule.optimisticThenObservedComparator);
1915        nodes.merge(children, Rule.optimisticThenObservedComparator);
1916      } else {
1917        /* The agenda being sorted, it is not worth considering the following
1918         * nodes. */
1919        break;
1920      }
1921    }
1922  }
1923
1924  /**
1925   * returns the results
1926   *
1927   * @return            the results
1928   */
1929  public SimpleLinkedList getResults() {
1930    return m_results;
1931  }
1932
1933  /**
1934   * Print the current best and worst values.
1935   */
1936  private void printValues() {
1937
1938    if (m_printValues == NO) {
1939      return;
1940    } else {
1941      if (m_results.isEmpty()) {
1942        if (m_printValues == OUT) {
1943          System.out.print("0.000000 0.000000 - 0.000000 0.000000");
1944        } else { //m_printValues == WINDOW
1945          m_valuesText.setText("0.000000 0.000000 - 0.000000 0.000000");
1946        }
1947      } else {
1948        Rule best = (Rule) m_results.getFirst();
1949        Rule worst = (Rule) m_results.getLast();
1950        String values = best.valuesToString() + " - " + worst.valuesToString();
1951        if (m_printValues == OUT) {
1952          System.out.print("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
1953                           + "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b");
1954          System.out.print(values);
1955        } else { //m_printValues == WINDOW
1956          m_valuesText.setText(values);
1957        }
1958      }
1959    }
1960  }
1961
1962  /**
1963   * Outputs the best rules found with their confirmation value and number
1964   * of counter-instances.
1965   * Also gives the number of hypotheses considered and explored, and the
1966   * time needed.
1967   */
1968  public String toString() {
1969
1970    StringBuffer text = new StringBuffer();
1971    SimpleLinkedList.LinkedListIterator iter = m_results.iterator();
1972    int size = m_results.size();
1973    int i = 0;
1974
1975    text.append("\nTertius\n=======\n\n");
1976
1977    while (iter.hasNext()) {
1978      Rule current = (Rule) iter.next();
1979      text.append(Utils.doubleToString((double) i + 1,
1980                                       (int) (Math.log(size) 
1981                                              / Math.log(10) + 1),
1982                                       0)
1983                  + ". ");
1984      text.append("/* ");
1985      if (m_roc) {
1986        text.append(current.rocToString());
1987      } else {
1988        text.append(current.valuesToString());
1989      }
1990      text.append(" */ ");
1991      text.append(current.toString());
1992      text.append("\n");
1993      i++;
1994    }
1995 
1996    text.append("\nNumber of hypotheses considered: " + m_hypotheses);
1997    text.append("\nNumber of hypotheses explored: " + m_explored);
1998
1999    if (m_status == MEMORY) {
2000      text.append("\n\nNot enough memory to continue the search");
2001    } else if (m_status == STOP) {
2002      text.append("\n\nSearch interrupted");
2003    }
2004
2005    return text.toString();
2006  }
2007 
2008  /**
2009   * Returns the revision string.
2010   *
2011   * @return            the revision
2012   */
2013  public String getRevision() {
2014    return RevisionUtils.extract("$Revision: 5444 $");
2015  }
2016
2017  /**
2018   * Main method.
2019   *
2020   * @param args the commandline parameters
2021   */
2022  public static void main(String [] args) {
2023    runAssociator(new Tertius(), args);
2024  }
2025}
Note: See TracBrowser for help on using the repository browser.