Context Navigation

source: src/main/java/weka/associations/Tertius.java @ 9

Last change on this file since 9 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 53.4 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* Tertius.java
19	* Copyright (C) 2003 Peter A. Flach, Nicolas Lachiche
20	*
21	* Thanks to Amelie Deltour for porting the original C code to Java
22	* and integrating it into Weka.
23	*/
24
25	package weka.associations;
26
27	import weka.associations.tertius.AttributeValueLiteral;
28	import weka.associations.tertius.IndividualInstances;
29	import weka.associations.tertius.IndividualLiteral;
30	import weka.associations.tertius.Literal;
31	import weka.associations.tertius.Predicate;
32	import weka.associations.tertius.Rule;
33	import weka.associations.tertius.SimpleLinkedList;
34	import weka.core.Attribute;
35	import weka.core.Capabilities;
36	import weka.core.Instances;
37	import weka.core.Option;
38	import weka.core.OptionHandler;
39	import weka.core.RevisionUtils;
40	import weka.core.SelectedTag;
41	import weka.core.Tag;
42	import weka.core.TechnicalInformation;
43	import weka.core.TechnicalInformationHandler;
44	import weka.core.Utils;
45	import weka.core.Capabilities.Capability;
46	import weka.core.TechnicalInformation.Field;
47	import weka.core.TechnicalInformation.Type;
48
49	import java.awt.BorderLayout;
50	import java.awt.Button;
51	import java.awt.Font;
52	import java.awt.Frame;
53	import java.awt.Label;
54	import java.awt.TextField;
55	import java.awt.event.ActionEvent;
56	import java.awt.event.ActionListener;
57	import java.io.BufferedReader;
58	import java.io.File;
59	import java.io.FileReader;
60	import java.io.Reader;
61	import java.util.ArrayList;
62	import java.util.Date;
63	import java.util.Enumeration;
64	import java.util.Vector;
65
66	/**
67	<!-- globalinfo-start -->
68	* Finds rules according to confirmation measure (Tertius-type algorithm).<br/>
69	* <br/>
70	* For more information see:<br/>
71	* <br/>
72	* P. A. Flach, N. Lachiche (1999). Confirmation-Guided Discovery of first-order rules with Tertius. Machine Learning. 42:61-95.
73	* <p/>
74	<!-- globalinfo-end -->
75	*
76	<!-- technical-bibtex-start -->
77	* BibTeX:
78	* <pre>
79	* @article{Flach1999,
80	* author = {P. A. Flach and N. Lachiche},
81	* journal = {Machine Learning},
82	* pages = {61-95},
83	* title = {Confirmation-Guided Discovery of first-order rules with Tertius},
84	* volume = {42},
85	* year = {1999}
86	* }
87	* </pre>
88	* <p/>
89	<!-- technical-bibtex-end -->
90	*
91	<!-- options-start -->
92	* Valid options are: <p/>
93	*
94	* <pre> -K <number of values in result>
95	* Set maximum number of confirmation values in the result. (default: 10)</pre>
96	*
97	* <pre> -F <frequency threshold>
98	* Set frequency threshold for pruning. (default: 0)</pre>
99	*
100	* <pre> -C <confirmation threshold>
101	* Set confirmation threshold. (default: 0)</pre>
102	*
103	* <pre> -N <noise threshold>
104	* Set noise threshold : maximum frequency of counter-examples.
105	* 0 gives only satisfied rules. (default: 1)</pre>
106	*
107	* <pre> -R
108	* Allow attributes to be repeated in a same rule.</pre>
109	*
110	* <pre> -L <number of literals>
111	* Set maximum number of literals in a rule. (default: 4)</pre>
112	*
113	* <pre> -G <0=no negation \| 1=body \| 2=head \| 3=body and head>
114	* Set the negations in the rule. (default: 0)</pre>
115	*
116	* <pre> -S
117	* Consider only classification rules.</pre>
118	*
119	* <pre> -c <class index>
120	* Set index of class attribute. (default: last).</pre>
121	*
122	* <pre> -H
123	* Consider only horn clauses.</pre>
124	*
125	* <pre> -E
126	* Keep equivalent rules.</pre>
127	*
128	* <pre> -M
129	* Keep same clauses.</pre>
130	*
131	* <pre> -T
132	* Keep subsumed rules.</pre>
133	*
134	* <pre> -I <0=always match \| 1=never match \| 2=significant>
135	* Set the way to handle missing values. (default: 0)</pre>
136	*
137	* <pre> -O
138	* Use ROC analysis. </pre>
139	*
140	* <pre> -p <name of file>
141	* Set the file containing the parts of the individual for individual-based learning.</pre>
142	*
143	* <pre> -P <0=no output \| 1=on stdout \| 2=in separate window>
144	* Set output of current values. (default: 0)</pre>
145	*
146	<!-- options-end -->
147	*
148	* @author <a href="mailto:adeltour@netcourrier.com">Amelie Deltour</a>
149	* @version $Revision: 5444 $
150	*/
151
152	public class Tertius
153	extends AbstractAssociator
154	implements OptionHandler, Runnable, TechnicalInformationHandler {
155
156	/** for serialization */
157	static final long serialVersionUID = 5556726848380738179L;
158
159	/** The results. */
160	private SimpleLinkedList m_results;
161
162	/** Number of hypotheses considered. */
163	private int m_hypotheses;
164
165	/** Number of hypotheses explored. */
166	private int m_explored;
167
168	/** Time needed for the search. */
169	private Date m_time;
170
171	/** Field to output the current values. */
172	private TextField m_valuesText;
173
174	/** Instances used for the search. */
175	private Instances m_instances;
176
177	/** Predicates used in the rules. */
178	private ArrayList m_predicates;
179
180	/** Status of the search. */
181	private int m_status;
182	/** Status of the search: normal */
183	private static final int NORMAL = 0;
184	/** Status of the search: memory problem */
185	private static final int MEMORY = 1;
186	/** Status of the search: user interruption */
187	private static final int STOP = 2;
188
189	/* Pruning options. */
190
191	/** Number of best confirmation values to search. */
192	private int m_best;
193
194	/** Frequency threshold for the body and the negation of the head. */
195	private double m_frequencyThreshold;
196
197	/** Confirmation threshold for the rules. */
198	private double m_confirmationThreshold;
199
200	/** Maximal number of counter-instances. */
201	private double m_noiseThreshold;
202
203	/* Search space & language bias options. */
204
205	/** Repeat attributes ? */
206	private boolean m_repeat;
207
208	/** Number of literals in a rule. */
209	private int m_numLiterals;
210
211	/** Type of negation: none */
212	private static final int NONE = 0;
213	/** Type of negation: body */
214	private static final int BODY = 1;
215	/** Type of negation: head */
216	private static final int HEAD = 2;
217	/** Type of negation: all */
218	private static final int ALL = 3;
219	/** Types of negation. */
220	private static final Tag [] TAGS_NEGATION = {
221	new Tag(NONE, "None"),
222	new Tag(BODY, "Body"),
223	new Tag(HEAD, "Head"),
224	new Tag(ALL, "Both")
225	};
226
227	/** Type of negation used in the rules. */
228	private int m_negation;
229
230	/** Classification bias. */
231	private boolean m_classification;
232
233	/** Index of class attribute. */
234	private int m_classIndex;
235
236	/** Horn clauses bias. */
237	private boolean m_horn;
238
239	/* Subsumption tests options. */
240
241	/** Perform test on equivalent rules ? */
242	private boolean m_equivalent;
243
244	/** Perform test on same clauses ? */
245	private boolean m_sameClause;
246
247	/** Perform subsumption test ? */
248	private boolean m_subsumption;
249
250	/** Way of handling missing values: min counterinstances */
251	public static final int EXPLICIT = 0;
252	/** Way of handling missing values: max counterinstances */
253	public static final int IMPLICIT = 1;
254	/** Way of handling missing values: missing as a particular value */
255	public static final int SIGNIFICANT = 2;
256	/** Ways of handling missing values. */
257	private static final Tag [] TAGS_MISSING = {
258	new Tag(EXPLICIT, "Matches all"),
259	new Tag(IMPLICIT, "Matches none"),
260	new Tag(SIGNIFICANT, "Significant")
261	};
262
263	/** Way of handling missing values in the search. */
264	private int m_missing;
265
266	/** Perform ROC analysis ? */
267	private boolean m_roc;
268
269	/** Name of the file containing the parts for individual-based learning. */
270	private String m_partsString;
271
272	/** Part instances for individual-based learning. */
273	private Instances m_parts;
274
275	/** Type of values output: No */
276	private static final int NO = 0;
277	/** Type of values output: stdout */
278	private static final int OUT = 1;
279	/** Type of values output: Window */
280	private static final int WINDOW = 2;
281	/** Types of values output. */
282	private static final Tag [] TAGS_VALUES = {
283	new Tag(NO, "No"),
284	new Tag(OUT, "stdout"),
285	new Tag(WINDOW, "Window")
286	};
287
288	/** Type of values output. */
289	private int m_printValues;
290
291	/**
292	* Constructor that sets the options to the default values.
293	*/
294	public Tertius() {
295
296	resetOptions();
297	}
298
299	/**
300	* Returns a string describing this associator.
301	*
302	* @return A description of the evaluator suitable for
303	* displaying in the explorer/experimenter gui.
304	*/
305	public String globalInfo() {
306	return
307	"Finds rules according to confirmation measure (Tertius-type "
308	+ "algorithm).\n\n"
309	+ "For more information see:\n\n"
310	+ getTechnicalInformation().toString();
311	}
312
313	/**
314	* Returns an instance of a TechnicalInformation object, containing
315	* detailed information about the technical background of this class,
316	* e.g., paper reference or book this class is based on.
317	*
318	* @return the technical information about this class
319	*/
320	public TechnicalInformation getTechnicalInformation() {
321	TechnicalInformation result;
322
323	result = new TechnicalInformation(Type.ARTICLE);
324	result.setValue(Field.AUTHOR, "P. A. Flach and N. Lachiche");
325	result.setValue(Field.YEAR, "1999");
326	result.setValue(Field.TITLE, "Confirmation-Guided Discovery of first-order rules with Tertius");
327	result.setValue(Field.JOURNAL, "Machine Learning");
328	result.setValue(Field.VOLUME, "42");
329	result.setValue(Field.PAGES, "61-95");
330
331	return result;
332	}
333
334	/**
335	* Resets the options to the default values.
336	*/
337	public void resetOptions() {
338
339	/* Pruning options. */
340	m_best = 10;
341	m_frequencyThreshold = 0;
342	m_confirmationThreshold = 0;
343	m_noiseThreshold = 1;
344
345	/* Search space & language bias options. */
346	m_repeat = false;
347	m_numLiterals = 4;
348	m_negation = NONE;
349	m_classification = false;
350	m_classIndex = 0;
351	m_horn = false;
352
353	/* Subsumption tests options. */
354	m_equivalent = true;
355	m_sameClause = true;
356	m_subsumption = true;
357
358	/* Missing values. */
359	m_missing = EXPLICIT;
360
361	/* ROC analysis. */
362	m_roc = false;
363
364	/* Individual-based learning. */
365	m_partsString = "";
366	m_parts = null;
367
368	/* Values output. */
369	m_printValues = NO;
370	}
371
372	/**
373	* Returns an enumeration describing the available options.
374	*
375	* @return An enumeration of all the available options.
376	*/
377	public Enumeration listOptions() {
378
379	Vector newVector = new Vector(17);
380
381	/* Pruning options. */
382	newVector.addElement(new Option("\tSet maximum number of confirmation "
383	+ "values in the result. (default: 10)",
384	"K", 1, "-K <number of values in result>"));
385	newVector.addElement(new Option("\tSet frequency threshold for pruning. "
386	+ "(default: 0)",
387	"F", 1, "-F <frequency threshold>"));
388	newVector.addElement(new Option("\tSet confirmation threshold. "
389	+ "(default: 0)",
390	"C", 1, "-C <confirmation threshold>"));
391	newVector.addElement(new Option("\tSet noise threshold : maximum frequency "
392	+ "of counter-examples.\n\t0 gives only "
393	+ "satisfied rules. (default: 1)",
394	"N", 1, "-N <noise threshold>"));
395
396	/* Search space & language bias options. */
397	newVector.addElement(new Option("\tAllow attributes to be repeated in a "
398	+ "same rule.",
399	"R", 0, "-R"));
400	newVector.addElement(new Option("\tSet maximum number of literals in a "
401	+ "rule. (default: 4)",
402	"L", 1, "-L <number of literals>"));
403	newVector.addElement(new Option("\tSet the negations in the rule. "
404	+ "(default: 0)",
405	"G", 1, "-G <0=no negation \| "
406	+ "1=body \| "
407	+ "2=head \| "
408	+ "3=body and head>"));
409	newVector.addElement(new Option("\tConsider only classification rules.",
410	"S", 0, "-S"));
411	newVector.addElement(new Option("\tSet index of class attribute. "
412	+ "(default: last).",
413	"c", 1, "-c <class index>"));
414	newVector.addElement(new Option("\tConsider only horn clauses.",
415	"H", 0, "-H"));
416
417	/* Subsumption tests options. */
418	newVector.addElement(new Option("\tKeep equivalent rules.",
419	"E", 0, "-E"));
420	newVector.addElement(new Option("\tKeep same clauses.",
421	"M", 0, "-M"));
422	newVector.addElement(new Option("\tKeep subsumed rules.",
423	"T", 0, "-T"));
424
425	/* Missing values options. */
426	newVector.addElement(new Option("\tSet the way to handle missing values. "
427	+ "(default: 0)",
428	"I", 1, "-I <0=always match \| "
429	+ "1=never match \| "
430	+ "2=significant>"));
431
432	/* ROC analysis. */
433	newVector.addElement(new Option("\tUse ROC analysis. ",
434	"O", 0, "-O"));
435
436	/* Individual-based learning. */
437	newVector.addElement(new Option("\tSet the file containing the parts of "
438	+ "the individual for individual-based "
439	+ "learning.",
440	"p", 1, "-p <name of file>"));
441
442	/* Values output. */
443	newVector.addElement(new Option("\tSet output of current values. "
444	+ "(default: 0)",
445	"P", 1, "-P <0=no output \| "
446	+ "1=on stdout \| "
447	+ "2=in separate window>"));
448
449	return newVector.elements();
450	}
451
452	/**
453	* Parses a given list of options. <p/>
454	*
455	<!-- options-start -->
456	* Valid options are: <p/>
457	*
458	* <pre> -K <number of values in result>
459	* Set maximum number of confirmation values in the result. (default: 10)</pre>
460	*
461	* <pre> -F <frequency threshold>
462	* Set frequency threshold for pruning. (default: 0)</pre>
463	*
464	* <pre> -C <confirmation threshold>
465	* Set confirmation threshold. (default: 0)</pre>
466	*
467	* <pre> -N <noise threshold>
468	* Set noise threshold : maximum frequency of counter-examples.
469	* 0 gives only satisfied rules. (default: 1)</pre>
470	*
471	* <pre> -R
472	* Allow attributes to be repeated in a same rule.</pre>
473	*
474	* <pre> -L <number of literals>
475	* Set maximum number of literals in a rule. (default: 4)</pre>
476	*
477	* <pre> -G <0=no negation \| 1=body \| 2=head \| 3=body and head>
478	* Set the negations in the rule. (default: 0)</pre>
479	*
480	* <pre> -S
481	* Consider only classification rules.</pre>
482	*
483	* <pre> -c <class index>
484	* Set index of class attribute. (default: last).</pre>
485	*
486	* <pre> -H
487	* Consider only horn clauses.</pre>
488	*
489	* <pre> -E
490	* Keep equivalent rules.</pre>
491	*
492	* <pre> -M
493	* Keep same clauses.</pre>
494	*
495	* <pre> -T
496	* Keep subsumed rules.</pre>
497	*
498	* <pre> -I <0=always match \| 1=never match \| 2=significant>
499	* Set the way to handle missing values. (default: 0)</pre>
500	*
501	* <pre> -O
502	* Use ROC analysis. </pre>
503	*
504	* <pre> -p <name of file>
505	* Set the file containing the parts of the individual for individual-based learning.</pre>
506	*
507	* <pre> -P <0=no output \| 1=on stdout \| 2=in separate window>
508	* Set output of current values. (default: 0)</pre>
509	*
510	<!-- options-end -->
511	*
512	* @param options The list of options as an array of strings.
513	* @throws Exception if an option is not supported.
514	*/
515	public void setOptions(String [] options) throws Exception {
516
517	resetOptions();
518
519	/* Pruning options. */
520	String bestString = Utils.getOption('K', options);
521	if (bestString.length() != 0) {
522	try {
523	m_best = Integer.parseInt(bestString);
524	} catch (Exception e) {
525	throw new Exception("Invalid value for -K option: "
526	+ e.getMessage() + ".");
527	}
528	if (m_best < 1) {
529	throw new Exception("Number of confirmation values has to be "
530	+ "greater than one!");
531	}
532	}
533	String frequencyThresholdString = Utils.getOption('F', options);
534	if (frequencyThresholdString.length() != 0) {
535	try {
536	m_frequencyThreshold
537	= (new Double(frequencyThresholdString)).doubleValue();
538	} catch (Exception e) {
539	throw new Exception("Invalid value for -F option: "
540	+ e.getMessage() + ".");
541	}
542	if (m_frequencyThreshold < 0 \|\| m_frequencyThreshold > 1) {
543	throw new Exception("Frequency threshold has to be between "
544	+ "zero and one!");
545	}
546	}
547	String confirmationThresholdString = Utils.getOption('C', options);
548	if (confirmationThresholdString.length() != 0) {
549	try {
550	m_confirmationThreshold
551	= (new Double(confirmationThresholdString)).doubleValue();
552	} catch (Exception e) {
553	throw new Exception("Invalid value for -C option: "
554	+ e.getMessage() + ".");
555	}
556	if (m_confirmationThreshold < 0 \|\| m_confirmationThreshold > 1) {
557	throw new Exception("Confirmation threshold has to be between "
558	+ "zero and one!");
559	}
560	if (bestString.length() != 0) {
561	throw new Exception("Specifying both a number of confirmation "
562	+ "values and a confirmation threshold "
563	+ "doesn't make sense!");
564	}
565	if (m_confirmationThreshold != 0) {
566	m_best = 0;
567	}
568	}
569	String noiseThresholdString = Utils.getOption('N', options);
570	if (noiseThresholdString.length() != 0) {
571	try {
572	m_noiseThreshold = (new Double(noiseThresholdString)).doubleValue();
573	} catch (Exception e) {
574	throw new Exception("Invalid value for -N option: "
575	+ e.getMessage() + ".");
576	}
577	if (m_noiseThreshold < 0 \|\| m_noiseThreshold > 1) {
578	throw new Exception("Noise threshold has to be between "
579	+ "zero and one!");
580	}
581	}
582
583	/* Search space and language bias options. */
584	m_repeat = Utils.getFlag('R', options);
585	String numLiteralsString = Utils.getOption('L', options);
586	if (numLiteralsString.length() != 0) {
587	try {
588	m_numLiterals = Integer.parseInt(numLiteralsString);
589	} catch (Exception e) {
590	throw new Exception("Invalid value for -L option: "
591	+ e.getMessage() + ".");
592	}
593	if (m_numLiterals < 1) {
594	throw new Exception("Number of literals has to be "
595	+ "greater than one!");
596	}
597	}
598	String negationString = Utils.getOption('G', options);
599	if (negationString.length() != 0) {
600	SelectedTag selected;
601	int tag;
602	try {
603	tag = Integer.parseInt(negationString);
604	} catch (Exception e) {
605	throw new Exception("Invalid value for -G option: "
606	+ e.getMessage() + ".");
607	}
608	try {
609	selected = new SelectedTag(tag, TAGS_NEGATION);
610	} catch (Exception e) {
611	throw new Exception("Value for -G option has to be "
612	+ "between zero and three!");
613	}
614	setNegation(selected);
615	}
616	m_classification = Utils.getFlag('S', options);
617	String classIndexString = Utils.getOption('c', options);
618	if (classIndexString.length() != 0) {
619	try {
620	m_classIndex = Integer.parseInt(classIndexString);
621	} catch (Exception e) {
622	throw new Exception("Invalid value for -c option: "
623	+ e.getMessage() + ".");
624	}
625	}
626	m_horn = Utils.getFlag('H', options);
627	if (m_horn && (m_negation != NONE)) {
628	throw new Exception("Considering horn clauses doesn't make sense "
629	+ "if negation allowed!");
630	}
631
632	/* Subsumption tests options. */
633	m_equivalent = !(Utils.getFlag('E', options));
634	m_sameClause = !(Utils.getFlag('M', options));
635	m_subsumption = !(Utils.getFlag('T', options));
636
637	/* Missing values options. */
638	String missingString = Utils.getOption('I', options);
639	if (missingString.length() != 0) {
640	SelectedTag selected;
641	int tag;
642	try {
643	tag = Integer.parseInt(missingString);
644	} catch (Exception e) {
645	throw new Exception("Invalid value for -I option: "
646	+ e.getMessage() + ".");
647	}
648	try {
649	selected = new SelectedTag(tag, TAGS_MISSING);
650	} catch (Exception e) {
651	throw new Exception("Value for -I option has to be "
652	+ "between zero and two!");
653	}
654	setMissingValues(selected);
655	}
656
657	/* ROC analysis. */
658	m_roc = Utils.getFlag('O', options);
659
660
661	/* Individual-based learning. */
662	m_partsString = Utils.getOption('p', options);
663	if (m_partsString.length() != 0) {
664	Reader reader;
665	try {
666	reader = new BufferedReader(new FileReader(m_partsString));
667	} catch (Exception e) {
668	throw new Exception("Can't open file " + e.getMessage() + ".");
669	}
670	m_parts = new Instances(reader);
671	}
672
673	/* Values output. */
674	String printValuesString = Utils.getOption('P', options);
675	if (printValuesString.length() != 0) {
676	SelectedTag selected;
677	int tag;
678	try {
679	tag = Integer.parseInt(printValuesString);
680	} catch (Exception e) {
681	throw new Exception("Invalid value for -P option: "
682	+ e.getMessage() + ".");
683	}
684	try {
685	selected = new SelectedTag(tag, TAGS_VALUES);
686	} catch (Exception e) {
687	throw new Exception("Value for -P option has to be "
688	+ "between zero and two!");
689	}
690	setValuesOutput(selected);
691	}
692	}
693
694	/**
695	* Gets the current settings of the Tertius object.
696	*
697	* @return An array of strings suitable for passing to setOptions.
698	*/
699	public String [] getOptions() {
700	Vector result;
701
702	result = new Vector();
703
704	/* Pruning options. */
705	if (m_best > 0) {
706	result.add("-K");
707	result.add("" + m_best);
708	}
709
710	result.add("-F");
711	result.add("" + m_frequencyThreshold);
712
713	if (m_confirmationThreshold > 0) {
714	result.add("-C");
715	result.add("" + m_confirmationThreshold);
716	}
717
718	result.add("-N");
719	result.add("" + m_noiseThreshold);
720
721	/* Search space and language bias options. */
722	if (m_repeat)
723	result.add("-R");
724
725	result.add("-L");
726	result.add("" + m_numLiterals);
727
728	result.add("-G");
729	result.add("" + m_negation);
730
731	if (m_classification)
732	result.add("-S");
733
734	result.add("-c");
735	result.add("" + m_classIndex);
736
737	if (m_horn)
738	result.add("-H");
739
740	/* Subsumption tests options. */
741	if (!m_equivalent)
742	result.add("-E");
743
744	if (!m_sameClause)
745	result.add("-M");
746
747	if (!m_subsumption)
748	result.add("-T");
749
750	/* Missing values options. */
751	result.add("-I");
752	result.add("" + m_missing);
753
754	/* ROC analysis. */
755	if (m_roc)
756	result.add("-O");
757
758	/* Individual-based learning. */
759	if (m_partsString.length() > 0) {
760	result.add("-p");
761	result.add("" + m_partsString);
762	}
763
764	/* Values output. */
765	result.add("-P");
766	result.add("" + m_printValues);
767
768	return (String[]) result.toArray(new String[result.size()]);
769	}
770
771	/**
772	* Returns the tip text for this property.
773	*
774	* @return Tip text for this property suitable for
775	* displaying in the explorer/experimenter GUI.
776	*/
777	public String confirmationValuesTipText() {
778
779	return "Number of best confirmation values to find.";
780	}
781
782	/**
783	* Get the value of confirmationValues.
784	*
785	* @return Value of confirmationValues.
786	*/
787	public int getConfirmationValues() {
788
789	return m_best;
790	}
791
792	/**
793	* Set the value of confirmationValues.
794	*
795	* @param v Value to assign to confirmationValues.
796	*/
797	public void setConfirmationValues(int v) {
798
799	m_best = v;
800	}
801
802	/**
803	* Returns the tip text for this property.
804	*
805	* @return Tip text for this property suitable for
806	* displaying in the explorer/experimenter GUI.
807	*/
808	public String frequencyThresholdTipText() {
809
810	return "Minimum proportion of instances satisfying head and body of rules";
811	}
812
813	/**
814	* Get the value of frequencyThreshold.
815	*
816	* @return Value of frequencyThreshold.
817	*/
818	public double getFrequencyThreshold() {
819
820	return m_frequencyThreshold;
821	}
822
823	/**
824	* Set the value of frequencyThreshold.
825	*
826	* @param v Value to assign to frequencyThreshold.
827	*/
828	public void setFrequencyThreshold(double v) {
829
830	m_frequencyThreshold = v;
831	}
832
833	/**
834	* Returns the tip text for this property.
835	*
836	* @return Tip text for this property suitable for
837	* displaying in the explorer/experimenter GUI.
838	*/
839	public String confirmationThresholdTipText() {
840
841	return "Minimum confirmation of the rules.";
842	}
843
844	/**
845	* Get the value of confirmationThreshold.
846	*
847	* @return Value of confirmationThreshold.
848	*/
849	public double getConfirmationThreshold() {
850
851	return m_confirmationThreshold;
852	}
853
854	/**
855	* Set the value of confirmationThreshold.
856	*
857	* @param v Value to assign to confirmationThreshold.
858	*/
859	public void setConfirmationThreshold(double v) {
860
861	m_confirmationThreshold = v;
862	if (v != 0) {
863	m_best = 0;
864	}
865	}
866
867	/**
868	* Returns the tip text for this property.
869	*
870	* @return Tip text for this property suitable for
871	* displaying in the explorer/experimenter GUI.
872	*/
873	public String noiseThresholdTipText() {
874
875	return "Maximum proportion of counter-instances of rules. "
876	+ "If set to 0, only satisfied rules will be given.";
877	}
878
879	/**
880	* Get the value of noiseThreshold.
881	*
882	* @return Value of noiseThreshold.
883	*/
884	public double getNoiseThreshold() {
885
886	return m_noiseThreshold;
887	}
888
889	/**
890	* Set the value of noiseThreshold.
891	*
892	* @param v Value to assign to noiseThreshold.
893	*/
894	public void setNoiseThreshold(double v) {
895
896	m_noiseThreshold = v;
897	}
898
899	/**
900	* Returns the tip text for this property.
901	*
902	* @return Tip text for this property suitable for
903	* displaying in the explorer/experimenter GUI.
904	*/
905	public String repeatLiteralsTipText() {
906
907	return "Repeated attributes allowed.";
908	}
909
910	/**
911	* Get the value of repeatLiterals.
912	*
913	* @return Value of repeatLiterals.
914	*/
915	public boolean getRepeatLiterals() {
916
917	return m_repeat;
918	}
919
920	/**
921	* Set the value of repeatLiterals.
922	*
923	* @param v Value to assign to repeatLiterals.
924	*/
925	public void setRepeatLiterals(boolean v) {
926
927	m_repeat = v;
928	}
929
930	/**
931	* Returns the tip text for this property.
932	*
933	* @return Tip text for this property suitable for
934	* displaying in the explorer/experimenter GUI.
935	*/
936	public String numberLiteralsTipText() {
937
938	return "Maximum number of literals in a rule.";
939	}
940
941	/**
942	* Get the value of numberLiterals.
943	*
944	* @return Value of numberLiterals.
945	*/
946	public int getNumberLiterals() {
947
948	return m_numLiterals;
949	}
950
951	/**
952	* Set the value of numberLiterals.
953	*
954	* @param v Value to assign to numberLiterals.
955	*/
956	public void setNumberLiterals(int v) {
957
958	m_numLiterals = v;
959	}
960
961	/**
962	* Returns the tip text for this property.
963	*
964	* @return Tip text for this property suitable for
965	* displaying in the explorer/experimenter GUI.
966	*/
967	public String negationTipText() {
968
969	return "Set the type of negation allowed in the rule. "
970	+ "Negation can be allowed in the body, in the head, in both "
971	+ "or in none.";
972	}
973
974	/**
975	* Get the value of negation.
976	*
977	* @return Value of negation.
978	*/
979	public SelectedTag getNegation() {
980
981	return new SelectedTag(m_negation, TAGS_NEGATION);
982	}
983
984	/**
985	* Set the value of negation.
986	*
987	* @param v Value to assign to negation.
988	*/
989	public void setNegation(SelectedTag v) {
990
991	if (v.getTags() == TAGS_NEGATION) {
992	m_negation = v.getSelectedTag().getID();
993	}
994	}
995
996	/**
997	* Returns the tip text for this property.
998	*
999	* @return Tip text for this property suitable for
1000	* displaying in the explorer/experimenter GUI.
1001	*/
1002	public String classificationTipText() {
1003
1004	return "Find only rules with the class in the head.";
1005	}
1006
1007	/**
1008	* Get the value of classification.
1009	*
1010	* @return Value of classification.
1011	*/
1012	public boolean getClassification() {
1013
1014	return m_classification;
1015	}
1016
1017	/**
1018	* Set the value of classification.
1019	*
1020	* @param v Value to assign to classification.
1021	*/
1022	public void setClassification(boolean v) {
1023
1024	m_classification = v;
1025	}
1026
1027	/**
1028	* Returns the tip text for this property.
1029	*
1030	* @return Tip text for this property suitable for
1031	* displaying in the explorer/experimenter GUI.
1032	*/
1033	public String classIndexTipText() {
1034
1035	return "Index of the class attribute. If set to 0, the class will be the last attribute.";
1036	}
1037
1038	/**
1039	* Get the value of classIndex.
1040	*
1041	* @return Value of classIndex.
1042	*/
1043	public int getClassIndex() {
1044
1045	return m_classIndex;
1046	}
1047
1048	/**
1049	* Set the value of classIndex.
1050	*
1051	* @param v Value to assign to classIndex.
1052	*/
1053	public void setClassIndex(int v) {
1054
1055	m_classIndex = v;
1056	}
1057
1058	/**
1059	* Returns the tip text for this property.
1060	*
1061	* @return Tip text for this property suitable for
1062	* displaying in the explorer/experimenter GUI.
1063	*/
1064	public String hornClausesTipText() {
1065
1066	return "Find rules with a single conclusion literal only.";
1067	}
1068
1069	/**
1070	* Get the value of hornClauses.
1071	*
1072	* @return Value of hornClauses.
1073	*/
1074	public boolean getHornClauses() {
1075
1076	return m_horn;
1077	}
1078
1079	/**
1080	* Set the value of hornClauses.
1081	*
1082	* @param v Value to assign to hornClauses.
1083	*/
1084	public void setHornClauses(boolean v) {
1085
1086	m_horn = v;
1087	}
1088
1089	/**
1090	* Returns the tip text for this property.
1091	*
1092	* @return Tip text for this property suitable for
1093	* displaying in the explorer/experimenter GUI.
1094	*/
1095	public String equivalentTipText() {
1096
1097	return "Keep equivalent rules. "
1098	+ "A rule r2 is equivalent to a rule r1 if the body of r2 is the "
1099	+ "negation of the head of r1, and the head of r2 is the "
1100	+ "negation of the body of r1.";
1101	}
1102
1103	/**
1104	* Get the value of equivalent.
1105	*
1106	* @return Value of equivalent.
1107	*/
1108	public boolean disabled_getEquivalent() {
1109
1110	return !m_equivalent;
1111	}
1112
1113	/**
1114	* Set the value of equivalent.
1115	*
1116	* @param v Value to assign to equivalent.
1117	*/
1118	public void disabled_setEquivalent(boolean v) {
1119
1120	m_equivalent = !v;
1121	}
1122
1123	/**
1124	* Returns the tip text for this property.
1125	*
1126	* @return Tip text for this property suitable for
1127	* displaying in the explorer/experimenter GUI.
1128	*/
1129	public String sameClauseTipText() {
1130
1131	return "Keep rules corresponding to the same clauses. "
1132	+ "If set to false, only the rule with the best confirmation "
1133	+ "value and rules with a lower number of counter-instances "
1134	+ "will be kept.";
1135	}
1136
1137	/**
1138	* Get the value of sameClause.
1139	*
1140	* @return Value of sameClause.
1141	*/
1142	public boolean disabled_getSameClause() {
1143
1144	return !m_sameClause;
1145	}
1146
1147	/**
1148	* Set the value of sameClause.
1149	*
1150	* @param v Value to assign to sameClause.
1151	*/
1152	public void disabled_setSameClause(boolean v) {
1153
1154	m_sameClause = !v;
1155	}
1156
1157	/**
1158	* Returns the tip text for this property.
1159	*
1160	* @return Tip text for this property suitable for
1161	* displaying in the explorer/experimenter GUI.
1162	*/
1163	public String subsumptionTipText() {
1164
1165	return "Keep subsumed rules. "
1166	+ "If set to false, subsumed rules will only be kept if they "
1167	+ "have a better confirmation or a lower number of counter-instances.";
1168	}
1169
1170	/**
1171	* Get the value of subsumption.
1172	*
1173	* @return Value of subsumption.
1174	*/
1175	public boolean disabled_getSubsumption() {
1176
1177	return !m_subsumption;
1178	}
1179
1180	/**
1181	* Set the value of subsumption.
1182	*
1183	* @param v Value to assign to subsumption.
1184	*/
1185	public void disabled_setSubsumption(boolean v) {
1186
1187	m_subsumption = !v;
1188	}
1189
1190	/**
1191	* Returns the tip text for this property.
1192	*
1193	* @return Tip text for this property suitable for
1194	* displaying in the explorer/experimenter GUI.
1195	*/
1196	public String missingValuesTipText() {
1197
1198	return "Set the way to handle missing values. "
1199	+ "Missing values can be set to match any value, or never match values "
1200	+ "or to be significant and possibly appear in rules.";
1201	}
1202
1203	/**
1204	* Get the value of missingValues.
1205	*
1206	* @return Value of missingValues.
1207	*/
1208	public SelectedTag getMissingValues() {
1209
1210	return new SelectedTag(m_missing, TAGS_MISSING);
1211	}
1212
1213	/**
1214	* Set the value of missingValues.
1215	*
1216	* @param v Value to assign to missingValues.
1217	*/
1218	public void setMissingValues(SelectedTag v) {
1219
1220	if (v.getTags() == TAGS_MISSING) {
1221	m_missing = v.getSelectedTag().getID();
1222	}
1223	}
1224
1225	/**
1226	* Returns the tip text for this property.
1227	*
1228	* @return Tip text for this property suitable for
1229	* displaying in the explorer/experimenter GUI.
1230	*/
1231	public String rocAnalysisTipText() {
1232
1233	return "Return TP-rate and FP-rate for each rule found.";
1234	}
1235
1236	/**
1237	* Get the value of rocAnalysis.
1238	*
1239	* @return Value of rocAnalysis.
1240	*/
1241	public boolean getRocAnalysis() {
1242
1243	return m_roc;
1244	}
1245
1246	/**
1247	* Set the value of rocAnalysis.
1248	*
1249	* @param v Value to assign to rocAnalysis.
1250	*/
1251	public void setRocAnalysis(boolean v) {
1252
1253	m_roc = v;
1254	}
1255
1256	/**
1257	* Returns the tip text for this property.
1258	*
1259	* @return Tip text for this property suitable for
1260	* displaying in the explorer/experimenter GUI.
1261	*/
1262	public String partFileTipText() {
1263
1264	return "Set file containing the parts of the individual "
1265	+ "for individual-based learning.";
1266	}
1267
1268	/**
1269	* Get the value of partFile.
1270	*
1271	* @return Value of partFile.
1272	*/
1273	public File disabled_getPartFile() {
1274
1275	return new File(m_partsString);
1276	}
1277
1278	/**
1279	* Set the value of partFile.
1280	*
1281	* @param v Value to assign to partFile.
1282	* @throws Exception if file cannot be opened
1283	*/
1284	public void disabled_setPartFile(File v) throws Exception {
1285
1286	m_partsString = v.getAbsolutePath();
1287	if (m_partsString.length() != 0) {
1288	Reader reader;
1289	try {
1290	reader = new BufferedReader(new FileReader(m_partsString));
1291	} catch (Exception e) {
1292	throw new Exception("Can't open file " + e.getMessage() + ".");
1293	}
1294	m_parts = new Instances(reader);
1295	}
1296	}
1297
1298	/**
1299	* Returns the tip text for this property.
1300	*
1301	* @return Tip text for this property suitable for
1302	* displaying in the explorer/experimenter GUI.
1303	*/
1304	public String valuesOutputTipText() {
1305
1306	return "Give visual feedback during the search. "
1307	+ "The current best and worst values can be output either to stdout or to a separate window.";
1308	}
1309
1310	/**
1311	* Get the value of valuesOutput.
1312	*
1313	* @return Value of valuesOutput.
1314	*/
1315	public SelectedTag getValuesOutput() {
1316
1317	return new SelectedTag(m_printValues, TAGS_VALUES);
1318	}
1319
1320	/**
1321	* Set the value of valuesOutput.
1322	*
1323	* @param v Value to assign to valuesOutput.
1324	*/
1325	public void setValuesOutput(SelectedTag v) {
1326
1327	if (v.getTags() == TAGS_VALUES) {
1328	m_printValues = v.getSelectedTag().getID();
1329	}
1330	}
1331
1332	/**
1333	* Build the predicate corresponding to an attribute.
1334	*
1335	* @param instances The instances this predicates comes from.
1336	* @param attr The attribute this predicate corresponds to.
1337	* @param isClass Saying if the attribute is the class attribute.
1338	* @return The corresponding Predicate.
1339	* @throws Exception if the predicate could not be build
1340	* (the attribute is numeric).
1341	*/
1342	private Predicate buildPredicate(Instances instances,
1343	Attribute attr, boolean isClass)
1344	throws Exception {
1345
1346	Predicate predicate; /* The result. */
1347	Literal lit;
1348	Literal negation;
1349	boolean missingValues; /* Missing values for this attribute ? */
1350	boolean individual = (m_parts != null); /* Individual-based learning ? */
1351	int type = (instances == m_parts)
1352	? IndividualLiteral.PART_PROPERTY
1353	: IndividualLiteral.INDIVIDUAL_PROPERTY; /* Type of property. */
1354
1355	if (attr.isNumeric()) {
1356	throw new Exception("Can't handle numeric attributes!");
1357	}
1358
1359	missingValues = instances.attributeStats(attr.index()).missingCount > 0;
1360
1361	/* Build predicate. */
1362	if (individual) {
1363	predicate = new Predicate(instances.relationName() + "." + attr.name(),
1364	attr.index(), isClass);
1365	} else {
1366	predicate = new Predicate(attr.name(), attr.index(), isClass);
1367	}
1368
1369	if (attr.numValues() == 2
1370	&& (!missingValues \|\| m_missing == EXPLICIT)) {
1371	/* Case of two values.
1372	* If there are missing values, this case is treated like other cases.
1373	*/
1374	if (individual) {
1375	lit = new IndividualLiteral(predicate, attr.value(0), 0,
1376	Literal.POS, m_missing, type);
1377	negation = new IndividualLiteral(predicate, attr.value(1), 1,
1378	Literal.POS, m_missing, type);
1379	} else {
1380	lit = new AttributeValueLiteral(predicate, attr.value(0), 0,
1381	Literal.POS, m_missing);
1382	negation = new AttributeValueLiteral(predicate, attr.value(1), 1,
1383	Literal.POS, m_missing);
1384	}
1385	lit.setNegation(negation);
1386	negation.setNegation(lit);
1387	predicate.addLiteral(lit);
1388	} else {
1389	/* Case of several values. */
1390	for (int i = 0; i < attr.numValues(); i++) {
1391	if (individual) {
1392	lit = new IndividualLiteral(predicate, attr.value(i), i,
1393	Literal.POS, m_missing, type);
1394	} else {
1395	lit = new AttributeValueLiteral(predicate, attr.value(i), i,
1396	Literal.POS, m_missing);
1397	}
1398	if (m_negation != NONE) {
1399	if (individual) {
1400	negation = new IndividualLiteral(predicate, attr.value(i), i,
1401	Literal.NEG, m_missing, type);
1402	} else {
1403	negation = new AttributeValueLiteral(predicate, attr.value(i), i,
1404	Literal.NEG, m_missing);
1405	}
1406	lit.setNegation(negation);
1407	negation.setNegation(lit);
1408	}
1409	predicate.addLiteral(lit);
1410	}
1411
1412	/* One more value if missing is significant. */
1413	if (missingValues && m_missing == SIGNIFICANT) {
1414	if (individual) {
1415	lit = new IndividualLiteral(predicate, "?", -1,
1416	Literal.POS, m_missing, type);
1417	} else {
1418	lit = new AttributeValueLiteral(predicate, "?", -1,
1419	Literal.POS, m_missing);
1420	}
1421	if (m_negation != NONE) {
1422	if (individual) {
1423	negation = new IndividualLiteral(predicate, "?", -1,
1424	Literal.NEG, m_missing, type);
1425	} else {
1426	negation = new AttributeValueLiteral(predicate, "?", -1,
1427	Literal.NEG, m_missing);
1428	}
1429	lit.setNegation(negation);
1430	negation.setNegation(lit);
1431	}
1432	predicate.addLiteral(lit);
1433	}
1434	}
1435	return predicate;
1436	}
1437
1438	/**
1439	* Build the predicates to use in the rules.
1440	*
1441	* @return the predicates
1442	* @throws Exception If the predicates could not be built
1443	* (numeric attribute).
1444	*/
1445	private ArrayList buildPredicates() throws Exception {
1446
1447	ArrayList predicates = new ArrayList(); /* The result. */
1448	Predicate predicate;
1449	Attribute attr;
1450	Enumeration attributes = m_instances.enumerateAttributes();
1451	boolean individual = (m_parts != null); /* Individual-based learning ? */
1452
1453	/* Attributes. */
1454	while (attributes.hasMoreElements()) {
1455	attr = (Attribute) attributes.nextElement();
1456	/* Identifiers do not appear in rules in individual-based learning. */
1457	if (!(individual && attr.name().equals("id"))) {
1458	predicate = buildPredicate(m_instances, attr, false);
1459	predicates.add(predicate);
1460	}
1461	}
1462	/* Class attribute. */
1463	attr = m_instances.classAttribute();
1464	/* Identifiers do not appear in rules. */
1465	if (!(individual && attr.name().equals("id"))) {
1466	predicate = buildPredicate(m_instances, attr, true);
1467	predicates.add(predicate);
1468	}
1469
1470	/* Attributes of the parts in individual-based learning. */
1471	if (individual) {
1472	attributes = m_parts.enumerateAttributes();
1473	while (attributes.hasMoreElements()) {
1474	attr = (Attribute) attributes.nextElement();
1475	/* Identifiers do not appear in rules. */
1476	if (!attr.name().equals("id")) {
1477	predicate = buildPredicate(m_parts, attr, false);
1478	predicates.add(predicate);
1479	}
1480	}
1481	}
1482
1483	return predicates;
1484	}
1485
1486	/**
1487	* Count the number of distinct confirmation values in the results.
1488	*
1489	* @return Number of confirmation values in the results.
1490	*/
1491	private int numValuesInResult() {
1492
1493	int result = 0;
1494	SimpleLinkedList.LinkedListIterator iter = m_results.iterator();
1495	Rule current;
1496	Rule next;
1497	if (!iter.hasNext()) {
1498	return result;
1499	} else {
1500	current = (Rule) iter.next();
1501	while (iter.hasNext()) {
1502	next = (Rule) iter.next();
1503	if (current.getConfirmation() > next.getConfirmation()) {
1504	result++;
1505	}
1506	current = next;
1507	}
1508	return result + 1;
1509	}
1510	}
1511
1512	/**
1513	* Test if it is worth refining a rule.
1514	*
1515	* @param rule The rule to consider.
1516	* @return True if the rule can be refined, false otherwise.
1517	*/
1518	private boolean canRefine(Rule rule) {
1519	if (rule.isEmpty()) {
1520	return true;
1521	}
1522	if (m_best != 0) {
1523	if (numValuesInResult() < m_best) {
1524	return true;
1525	}
1526	Rule worstResult = (Rule) m_results.getLast();
1527	if (rule.getOptimistic() >= worstResult.getConfirmation()) {
1528	return true;
1529	}
1530	return false;
1531	} else {
1532	return true;
1533	}
1534	}
1535
1536	/**
1537	* Test if it is worth calculating the optimistic estimate of a rule.
1538	*
1539	* @param rule The rule to consider.
1540	* @return True if the optimistic estimate can be calculated, false otherwise.
1541	*/
1542	private boolean canCalculateOptimistic(Rule rule) {
1543	if (rule.hasTrueBody() \|\| rule.hasFalseHead()) {
1544	return false;
1545	}
1546	if (!rule.overFrequencyThreshold(m_frequencyThreshold)) {
1547	return false;
1548	}
1549	return true;
1550	}
1551
1552	/**
1553	* Test if a rule can be explored (if it is interesting for the results
1554	* or for refining).
1555	*
1556	* @param rule The rule to consider.
1557	* @return True if the rule can be explored, false otherwise.
1558	*/
1559	private boolean canExplore(Rule rule) {
1560	if (rule.getOptimistic() < m_confirmationThreshold) {
1561	return false;
1562	}
1563	if (m_best != 0) {
1564	if (numValuesInResult() < m_best) {
1565	return true;
1566	}
1567	Rule worstResult = (Rule) m_results.getLast();
1568	if (rule.getOptimistic() >= worstResult.getConfirmation()) {
1569	return true;
1570	}
1571	return false;
1572	} else {
1573	return true;
1574	}
1575	}
1576
1577	/**
1578	* Test if a rule can be stored in the agenda.
1579	*
1580	* @param rule The rule to consider.
1581	* @return True if the rule can be stored, false otherwise.
1582	*/
1583	private boolean canStoreInNodes(Rule rule) {
1584	if (rule.getObservedNumber() == 0) {
1585	return false;
1586	}
1587	return true;
1588	}
1589
1590	/**
1591	* Test if it is worth calculating the confirmation of a rule.
1592	*
1593	* @param rule The rule to consider.
1594	* @return True if the confirmation can be calculated, false otherwise.
1595	*/
1596	private boolean canCalculateConfirmation(Rule rule) {
1597	if (rule.getObservedFrequency() > m_noiseThreshold) {
1598	return false;
1599	}
1600	return true;
1601	}
1602
1603	/**
1604	* Test if a rule can be added to the results.
1605	*
1606	* @param rule The rule to consider.
1607	* @return True if the rule can be stored, false otherwise.
1608	*/
1609	private boolean canStoreInResults(Rule rule) {
1610	if (rule.getConfirmation() < m_confirmationThreshold) {
1611	return false;
1612	}
1613	if (m_best != 0) {
1614	if (numValuesInResult() < m_best) {
1615	return true;
1616	}
1617	Rule worstResult = (Rule) m_results.getLast();
1618	if (rule.getConfirmation() >= worstResult.getConfirmation()) {
1619	return true;
1620	}
1621	return false;
1622	} else {
1623	return true;
1624	}
1625	}
1626
1627	/**
1628	* Add a rule in the appropriate place in the list of the results,
1629	* according to the confirmation and
1630	* number of counter-instances of the rule. <p>
1631	* Subsumption tests are performed and the rule may not be added. <p>
1632	* Previous results may also be removed because of sumption.
1633	*/
1634	private void addResult(Rule rule) {
1635
1636	Rule current;
1637	boolean added = false;
1638
1639	/* Iterate the list until we find the right place. */
1640	SimpleLinkedList.LinkedListIterator iter = m_results.iterator();
1641	while (iter.hasNext()) {
1642	current = (Rule) iter.next();
1643	if (Rule.confirmationThenObservedComparator.compare(current, rule) > 0) {
1644	iter.addBefore(rule);
1645	added = true;
1646	break;
1647	}
1648	/* Subsumption tests to see if the rule can be added. */
1649	if ((m_subsumption \|\| m_sameClause \|\| m_equivalent)
1650	&& current.subsumes(rule)) {
1651	if (current.numLiterals() == rule.numLiterals()) {
1652	if (current.equivalentTo(rule)) {
1653	/* Equivalent rules. */
1654	if (m_equivalent) {
1655	return;
1656	}
1657	} else {
1658	/* Same clauses. */
1659	if (m_sameClause
1660	&& Rule.confirmationComparator.compare(current, rule) < 0) {
1661	return;
1662	}
1663	}
1664	} else {
1665	/* Subsumption. */
1666	if (m_subsumption
1667	&& Rule.observedComparator.compare(current, rule) <= 0) {
1668	return;
1669	}
1670	}
1671	}
1672	}
1673
1674	if (added == false) {
1675	/* The rule must be added in the end of the results. */
1676	m_results.add(rule);
1677	}
1678
1679	/* Iterate the results with a lower confirmation
1680	* to see if some of them must be removed. */
1681	SimpleLinkedList.LinkedListInverseIterator inverse
1682	= m_results.inverseIterator();
1683	while (inverse.hasPrevious()) {
1684	current = (Rule) inverse.previous();
1685	if (Rule.confirmationThenObservedComparator.compare(current, rule) < 0) {
1686	break;
1687	}
1688	if (current != rule && rule.subsumes(current)) {
1689	if (current.numLiterals() == rule.numLiterals()) {
1690	if (!current.equivalentTo(rule)) {
1691	/* Same clauses. */
1692	if (m_sameClause
1693	&& Rule.confirmationComparator.compare(current, rule) > 0) {
1694	inverse.remove();
1695	}
1696	}
1697	} else {
1698	/* Subsumption. */
1699	if (m_subsumption
1700	&& Rule.observedComparator.compare(rule, current) <= 0) {
1701	inverse.remove();
1702	}
1703	}
1704	}
1705	}
1706
1707	/* Remove the rules with the worst confirmation value
1708	* if there are too many results. */
1709	if (m_best != 0 && numValuesInResult() > m_best) {
1710	Rule worstRule = (Rule) m_results.getLast();
1711	inverse = m_results.inverseIterator();
1712	while (inverse.hasPrevious()) {
1713	current = (Rule) inverse.previous();
1714	if (Rule.confirmationComparator.compare(current, worstRule) < 0) {
1715	break;
1716	}
1717	inverse.remove();
1718	}
1719	}
1720
1721	/* Print the new current values. */
1722	printValues();
1723	}
1724
1725	/**
1726	* Returns default capabilities of the classifier.
1727	*
1728	* @return the capabilities of this classifier
1729	*/
1730	public Capabilities getCapabilities() {
1731	Capabilities result = super.getCapabilities();
1732	result.disableAll();
1733
1734	// attributes
1735	result.enable(Capability.NOMINAL_ATTRIBUTES);
1736	result.enable(Capability.MISSING_VALUES);
1737
1738	// class
1739	result.enable(Capability.NOMINAL_CLASS);
1740	result.enable(Capability.MISSING_CLASS_VALUES);
1741
1742	return result;
1743	}
1744
1745	/**
1746	* Method that launches the search to find the rules with the highest
1747	* confirmation.
1748	*
1749	* @param instances The instances to be used for generating the rules.
1750	* @throws Exception if rules can't be built successfully.
1751	*/
1752	public void buildAssociations(Instances instances) throws Exception {
1753
1754	Frame valuesFrame = null; /* Frame to display the current values. */
1755
1756	/* Initialization of the search. */
1757	if (m_parts == null) {
1758	m_instances = new Instances(instances);
1759	} else {
1760	m_instances = new IndividualInstances(new Instances(instances), m_parts);
1761	}
1762	m_results = new SimpleLinkedList();
1763	m_hypotheses = 0;
1764	m_explored = 0;
1765	m_status = NORMAL;
1766
1767	if (m_classIndex == -1)
1768	m_instances.setClassIndex(m_instances.numAttributes()-1);
1769	else if (m_classIndex < m_instances.numAttributes() && m_classIndex >= 0)
1770	m_instances.setClassIndex(m_classIndex);
1771	else
1772	throw new Exception("Invalid class index.");
1773
1774	// can associator handle the data?
1775	getCapabilities().testWithFail(m_instances);
1776
1777	/* Initialization of the window for current values. */
1778	if (m_printValues == WINDOW) {
1779	m_valuesText = new TextField(37);
1780	m_valuesText.setEditable(false);
1781	m_valuesText.setFont(new Font("Monospaced", Font.PLAIN, 12));
1782	Label valuesLabel = new Label("Best and worst current values:");
1783	Button stop = new Button("Stop search");
1784	stop.addActionListener(new ActionListener() {
1785	public void actionPerformed(ActionEvent e) {
1786	/* Signal the interruption to the search. */
1787	m_status = STOP;
1788	}
1789	});
1790	valuesFrame = new Frame("Tertius status");
1791	valuesFrame.setResizable(false);
1792	valuesFrame.add(m_valuesText, BorderLayout.CENTER);
1793	valuesFrame.add(stop, BorderLayout.SOUTH);
1794	valuesFrame.add(valuesLabel, BorderLayout.NORTH);
1795	valuesFrame.pack();
1796	valuesFrame.setVisible(true);
1797	} else if (m_printValues == OUT) {
1798	System.out.println("Best and worst current values:");
1799	}
1800
1801	Date start = new Date();
1802
1803	/* Build the predicates and launch the search. */
1804	m_predicates = buildPredicates();
1805	beginSearch();
1806
1807	Date end = new Date();
1808
1809	if (m_printValues == WINDOW) {
1810	valuesFrame.dispose();
1811	}
1812
1813	m_time = new Date(end.getTime() - start.getTime());
1814	}
1815
1816	/**
1817	* Run the search.
1818	*/
1819	public void run() {
1820	try {
1821	search();
1822	} catch (OutOfMemoryError e) {
1823	/* Garbage collect what can be collected to be able to continue. */
1824	System.gc();
1825	m_status = MEMORY;
1826	}
1827	endSearch();
1828	}
1829
1830	/**
1831	* Begin the search by starting a new thread.
1832	*/
1833	private synchronized void beginSearch() throws Exception {
1834	/* This method must be synchronized to be able to
1835	* call the wait() method. */
1836	Thread search = new Thread(this);
1837	search.start();
1838	try {
1839	/* Wait for the end of the thread. */
1840	wait();
1841	} catch (InterruptedException e) {
1842	/* Signal the interruption to the search. */
1843	m_status = STOP;
1844	}
1845	}
1846
1847	/**
1848	* End the search by notifying to the waiting thread that it is finished.
1849	*/
1850	private synchronized void endSearch() {
1851	/* This method must be synchronized to be able to
1852	* call the notify() method. */
1853	notify();
1854	}
1855
1856	/**
1857	* Search in the space of hypotheses the rules that have the highest
1858	* confirmation.
1859	* The search starts with the empty rule, other rules are generated by
1860	* refinement.
1861	*/
1862	public void search() {
1863
1864	SimpleLinkedList nodes = new SimpleLinkedList(); /* The agenda. */
1865	Rule currentNode;
1866	SimpleLinkedList children;
1867	SimpleLinkedList.LinkedListIterator iter;
1868	Rule child;
1869	boolean negBody = (m_negation == BODY \|\| m_negation == ALL);
1870	boolean negHead = (m_negation == HEAD \|\| m_negation == ALL);
1871
1872	/* Start with the empty rule. */
1873	nodes.add(new Rule(m_repeat, m_numLiterals, negBody, negHead,
1874	m_classification, m_horn));
1875
1876	/* Print the current values. */
1877	printValues();
1878
1879	/* Explore the rules in the agenda. */
1880	while (m_status != STOP && !nodes.isEmpty()) {
1881	currentNode = (Rule) nodes.removeFirst();
1882	if (canRefine(currentNode)) {
1883	children = currentNode.refine(m_predicates);
1884	iter = children.iterator();
1885	/* Calculate the optimistic estimate of the children and
1886	* consider them for adding to the agenda and to the results. */
1887	while (iter.hasNext()) {
1888	m_hypotheses++;
1889	child = (Rule) iter.next();
1890	child.upDate(m_instances);
1891	if (canCalculateOptimistic(child)) {
1892	child.calculateOptimistic();
1893	if (canExplore(child)) {
1894	m_explored++;
1895	if (canStoreInNodes(child)) {
1896	} else {
1897	iter.remove();
1898	}
1899	if (canCalculateConfirmation(child)) {
1900	child.calculateConfirmation();
1901	if (canStoreInResults(child)) {
1902	addResult(child);
1903	}
1904	}
1905	} else {
1906	iter.remove();
1907	}
1908	} else {
1909	iter.remove();
1910	}
1911	}
1912	/* Since the agenda is already sorted it is more efficient
1913	* to sort the children only and then merge. */
1914	children.sort(Rule.optimisticThenObservedComparator);
1915	nodes.merge(children, Rule.optimisticThenObservedComparator);
1916	} else {
1917	/* The agenda being sorted, it is not worth considering the following
1918	* nodes. */
1919	break;
1920	}
1921	}
1922	}
1923
1924	/**
1925	* returns the results
1926	*
1927	* @return the results
1928	*/
1929	public SimpleLinkedList getResults() {
1930	return m_results;
1931	}
1932
1933	/**
1934	* Print the current best and worst values.
1935	*/
1936	private void printValues() {
1937
1938	if (m_printValues == NO) {
1939	return;
1940	} else {
1941	if (m_results.isEmpty()) {
1942	if (m_printValues == OUT) {
1943	System.out.print("0.000000 0.000000 - 0.000000 0.000000");
1944	} else { //m_printValues == WINDOW
1945	m_valuesText.setText("0.000000 0.000000 - 0.000000 0.000000");
1946	}
1947	} else {
1948	Rule best = (Rule) m_results.getFirst();
1949	Rule worst = (Rule) m_results.getLast();
1950	String values = best.valuesToString() + " - " + worst.valuesToString();
1951	if (m_printValues == OUT) {
1952	System.out.print("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
1953	+ "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b");
1954	System.out.print(values);
1955	} else { //m_printValues == WINDOW
1956	m_valuesText.setText(values);
1957	}
1958	}
1959	}
1960	}
1961
1962	/**
1963	* Outputs the best rules found with their confirmation value and number
1964	* of counter-instances.
1965	* Also gives the number of hypotheses considered and explored, and the
1966	* time needed.
1967	*/
1968	public String toString() {
1969
1970	StringBuffer text = new StringBuffer();
1971	SimpleLinkedList.LinkedListIterator iter = m_results.iterator();
1972	int size = m_results.size();
1973	int i = 0;
1974
1975	text.append("\nTertius\n=======\n\n");
1976
1977	while (iter.hasNext()) {
1978	Rule current = (Rule) iter.next();
1979	text.append(Utils.doubleToString((double) i + 1,
1980	(int) (Math.log(size)
1981	/ Math.log(10) + 1),
1982	0)
1983	+ ". ");
1984	text.append("/* ");
1985	if (m_roc) {
1986	text.append(current.rocToString());
1987	} else {
1988	text.append(current.valuesToString());
1989	}
1990	text.append(" */ ");
1991	text.append(current.toString());
1992	text.append("\n");
1993	i++;
1994	}
1995
1996	text.append("\nNumber of hypotheses considered: " + m_hypotheses);
1997	text.append("\nNumber of hypotheses explored: " + m_explored);
1998
1999	if (m_status == MEMORY) {
2000	text.append("\n\nNot enough memory to continue the search");
2001	} else if (m_status == STOP) {
2002	text.append("\n\nSearch interrupted");
2003	}
2004
2005	return text.toString();
2006	}
2007
2008	/**
2009	* Returns the revision string.
2010	*
2011	* @return the revision
2012	*/
2013	public String getRevision() {
2014	return RevisionUtils.extract("$Revision: 5444 $");
2015	}
2016
2017	/**
2018	* Main method.
2019	*
2020	* @param args the commandline parameters
2021	*/
2022	public static void main(String [] args) {
2023	runAssociator(new Tertius(), args);
2024	}
2025	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: