Context Navigation

source: src/main/java/weka/classifiers/rules/FURIA.java @ 8

Last change on this file since 8 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 85.4 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* FURIA.java
19	* Copyright (C) 2008,2009 Jens Christian Huehn
20	*
21	* (based upon) JRip.java
22	* Copyright (C) 2001 Xin Xu, Eibe Frank
23	*/
24
25	package weka.classifiers.rules;
26
27	import java.io.Serializable;
28	import java.util.Enumeration;
29	import java.util.Random;
30	import java.util.Vector;
31
32	import weka.classifiers.AbstractClassifier;
33	import weka.core.AdditionalMeasureProducer;
34	import weka.core.Attribute;
35	import weka.core.Capabilities;
36	import weka.core.Copyable;
37	import weka.core.FastVector;
38	import weka.core.Instance;
39	import weka.core.Instances;
40	import weka.core.Option;
41	import weka.core.OptionHandler;
42	import weka.core.SelectedTag;
43	import weka.core.Tag;
44	import weka.core.TechnicalInformation;
45	import weka.core.TechnicalInformationHandler;
46	import weka.core.Utils;
47	import weka.core.WeightedInstancesHandler;
48	import weka.core.Capabilities.Capability;
49	import weka.core.TechnicalInformation.Field;
50	import weka.core.TechnicalInformation.Type;
51
52	/**
53	<!-- globalinfo-start -->
54	* FURIA: Fuzzy Unordered Rule Induction Algorithm<br/>
55	* <br/>
56	* Details please see:<br/>
57	* <br/>
58	* Jens Christian Huehn, Eyke Huellermeier (2009). FURIA: An Algorithm for Unordered Fuzzy Rule Induction. Data Mining and Knowledge Discovery..<br/>
59	* <br/>
60	* <p/>
61	<!-- globalinfo-end -->
62	*
63	<!-- technical-bibtex-start -->
64	* BibTeX:
65	* <pre>
66	* @article{Huehn2009,
67	* author = {Jens Christian Huehn and Eyke Huellermeier},
68	* journal = {Data Mining and Knowledge Discovery},
69	* title = {FURIA: An Algorithm for Unordered Fuzzy Rule Induction},
70	* year = {2009}
71	* }
72	* </pre>
73	* <p/>
74	<!-- technical-bibtex-end -->
75	*
76	<!-- options-start -->
77	* Valid options are: <p/>
78	*
79	* <pre> -F <number of folds>
80	* Set number of folds for REP
81	* One fold is used as pruning set.
82	* (default 3)</pre>
83	*
84	* <pre> -N <min. weights>
85	* Set the minimal weights of instances
86	* within a split.
87	* (default 2.0)</pre>
88	*
89	* <pre> -O <number of runs>
90	* Set the number of runs of
91	* optimizations. (Default: 2)</pre>
92	*
93	* <pre> -D
94	* Set whether turn on the
95	* debug mode (Default: false)</pre>
96	*
97	* <pre> -S <seed>
98	* The seed of randomization
99	* (Default: 1)</pre>
100	*
101	* <pre> -E
102	* Whether NOT check the error rate>=0.5
103	* in stopping criteria (default: check)</pre>
104	*
105	* <pre> -s
106	* The action performed for uncovered instances.
107	* (default: use stretching)</pre>
108	*
109	* <pre> -p
110	* The T-norm used as fuzzy AND-operator.
111	* (default: Product T-norm)</pre>
112	*
113	<!-- options-end -->
114	*
115	* @author Jens Christian Hühn (huehn@gmx.net)
116	* @author Xin Xu (xx5@cs.waikato.ac.nz)
117	* @author Eibe Frank (eibe@cs.waikato.ac.nz)
118	* @version $Revision: 5964 $
119	*/
120	public class FURIA
121	extends AbstractClassifier
122	implements OptionHandler, AdditionalMeasureProducer, WeightedInstancesHandler,
123	TechnicalInformationHandler {
124
125	/** for serialization */
126	static final long serialVersionUID = -6589312996832147161L;
127
128	/** The limit of description length surplus in ruleset generation */
129	private static double MAX_DL_SURPLUS = 64.0;
130
131	/** The class attribute of the data*/
132	private Attribute m_Class;
133
134	/** The ruleset */
135	private FastVector m_Ruleset;
136
137	/** The predicted class distribution */
138	private FastVector m_Distributions;
139
140	/** Runs of optimizations */
141	private int m_Optimizations = 2;
142
143	/** Random object used in this class */
144	private Random m_Random = null;
145
146	/** # of all the possible conditions in a rule */
147	private double m_Total = 0;
148
149	/** The seed to perform randomization */
150	private long m_Seed = 1;
151
152	/** The number of folds to split data into Grow and Prune for IREP */
153	private int m_Folds = 3;
154
155	/** The minimal number of instance weights within a split*/
156	private double m_MinNo = 2.0;
157
158	/** Whether in a debug mode */
159	private boolean m_Debug = false;
160
161	/** Whether check the error rate >= 0.5 in stopping criteria */
162	private boolean m_CheckErr = true;
163
164	/** The class distribution of the training data*/
165	private double[] aprioriDistribution;
166
167	/** The RuleStats for the ruleset of each class value */
168	private FastVector m_RulesetStats;
169
170
171	/** What to do if instance is uncovered */
172	private int m_uncovAction = UNCOVACTION_STRETCH;
173
174	/** An uncovered instance is covered using rule stretching. */
175	private static final int UNCOVACTION_STRETCH = 0;
176
177	/** An uncovered instance is classified according to the training data class distribution. */
178	private static final int UNCOVACTION_APRIORI = 1;
179
180	/** An uncovered instance is not classified at all. */
181	private static final int UNCOVACTION_REJECT = 2;
182
183	/** The tags explaining the uncovered action. */
184	private static final Tag [] TAGS_UNCOVACTION = {
185	new Tag(UNCOVACTION_STRETCH, "Apply rule stretching (standard)"),
186	new Tag(UNCOVACTION_APRIORI, "Vote for the most frequent class"),
187	new Tag(UNCOVACTION_REJECT, "Reject the decision and abstain")
188	};
189
190
191	/** Whether using product T-norm (or else min T-norm) */
192	private int m_tNorm = TNORM_PROD;
193
194	/** The Product T-Norm flag. */
195	private static final int TNORM_PROD = 0;
196
197	/** The Minimum T-Norm flag. */
198	private static final int TNORM_MIN = 1;
199
200	/** The tags describing the T-norms */
201	private static final Tag [] TAGS_TNORM = {
202	new Tag(TNORM_PROD, "Product T-Norm (standard)"),
203	new Tag(TNORM_MIN, "Minimum T-Norm")
204	};
205
206
207	/**
208	* Returns a string describing classifier
209	* @return a description suitable for
210	* displaying in the explorer/experimenter gui
211	*/
212	public String globalInfo() {
213
214	return "FURIA: Fuzzy Unordered Rule Induction Algorithm\n\n"
215	+ "Details please see:\n\n"
216	+ getTechnicalInformation().toString() + "\n\n";
217	}
218
219	/**
220	* Returns an instance of a TechnicalInformation object, containing
221	* detailed information about the technical background of this class,
222	* e.g., paper reference or book this class is based on.
223	*
224	* @return the technical information about this class
225	*/
226	public TechnicalInformation getTechnicalInformation() {
227	TechnicalInformation result;
228
229	result = new TechnicalInformation(Type.ARTICLE);
230	result.setValue(Field.AUTHOR, "Jens Christian Huehn and Eyke Huellermeier");
231	result.setValue(Field.TITLE, "FURIA: An Algorithm for Unordered Fuzzy Rule Induction");
232	result.setValue(Field.YEAR, "2009");
233	result.setValue(Field.JOURNAL, "Data Mining and Knowledge Discovery");
234	return result;
235	}
236
237	/**
238	* Returns an enumeration describing the available options
239	* Valid options are: <p>
240	*
241	* -F number <br>
242	* The number of folds for reduced error pruning. One fold is
243	* used as the pruning set. (Default: 3) <p>
244	*
245	* -N number <br>
246	* The minimal weights of instances within a split.
247	* (Default: 2) <p>
248	*
249	* -O number <br>
250	* Set the number of runs of optimizations. (Default: 2)<p>
251	*
252	* -D <br>
253	* Whether turn on the debug mode
254	*
255	* -S number <br>
256	* The seed of randomization used in FURIA.(Default: 1)<p>
257	*
258	* -E <br>
259	* Whether NOT check the error rate >= 0.5 in stopping criteria.
260	* (default: check)<p>
261	*
262	* -s <br>
263	* The action performed for uncovered instances.
264	* (default: use rule stretching)<p>
265	*
266	* -p <br>
267	* The T-Norm used as fuzzy AND-operator.
268	* (default: Product T-Norm)<p>
269	*
270	* @return an enumeration of all the available options
271	*/
272	public Enumeration listOptions() {
273	Vector newVector = new Vector(8);
274	newVector.addElement(new Option("\tSet number of folds for REP\n" +
275	"\tOne fold is used as pruning set.\n" +
276	"\t(default 3)","F", 1, "-F <number of folds>"));
277
278	newVector.addElement(new Option("\tSet the minimal weights of instances\n" +
279	"\twithin a split.\n" +
280	"\t(default 2.0)","N", 1, "-N <min. weights>"));
281
282	newVector.addElement(new Option("\tSet the number of runs of\n"+
283	"\toptimizations. (Default: 2)", "O",
284	1,"-O <number of runs>"));
285
286	newVector.addElement(new Option("\tSet whether turn on the\n"+
287	"\tdebug mode (Default: false)", "D",
288	0,"-D"));
289
290	newVector.addElement(new Option("\tThe seed of randomization\n"+
291	"\t(Default: 1)", "S",
292	1,"-S <seed>"));
293
294	newVector.addElement(new Option("\tWhether NOT check the error rate>=0.5\n"
295	+"\tin stopping criteria "
296	+"\t(default: check)", "E",
297	0, "-E"));
298
299	newVector.addElement(new Option("\tThe action performed for uncovered instances.\n"
300	+"\t(default: use stretching)", "s",
301	1, "-s"));
302
303	newVector.addElement(new Option("\tThe T-norm used as fuzzy AND-operator.\n"
304	+"\t(default: Product T-norm)", "p",
305	1, "-p"));
306
307	return newVector.elements();
308	}
309
310	/**
311	* Parses a given list of options. <p/>
312	*
313	<!-- options-start -->
314	* Valid options are: <p/>
315	*
316	* <pre> -F <number of folds>
317	* Set number of folds for REP
318	* One fold is used as pruning set.
319	* (default 3)</pre>
320	*
321	* <pre> -N <min. weights>
322	* Set the minimal weights of instances
323	* within a split.
324	* (default 2.0)</pre>
325	*
326	* <pre> -O <number of runs>
327	* Set the number of runs of
328	* optimizations. (Default: 2)</pre>
329	*
330	* <pre> -D
331	* Set whether turn on the
332	* debug mode (Default: false)</pre>
333	*
334	* <pre> -S <seed>
335	* The seed of randomization
336	* (Default: 1)</pre>
337	*
338	* <pre> -E
339	* Whether NOT check the error rate>=0.5
340	* in stopping criteria (default: check)</pre>
341	*
342	* <pre> -s
343	* The action performed for uncovered instances.
344	* (default: use stretching)</pre>
345	*
346	* <pre> -p
347	* The T-norm used as fuzzy AND-operator.
348	* (default: Product T-norm)</pre>
349	*
350	<!-- options-end -->
351	*
352	* @param options the list of options as an array of strings
353	* @throws Exception if an option is not supported
354	*/
355	public void setOptions(String[] options) throws Exception {
356	String numFoldsString = Utils.getOption('F', options);
357	if (numFoldsString.length() != 0)
358	m_Folds = Integer.parseInt(numFoldsString);
359	else
360	m_Folds = 3;
361
362	String minNoString = Utils.getOption('N', options);
363	if (minNoString.length() != 0)
364	m_MinNo = Double.parseDouble(minNoString);
365	else
366	m_MinNo = 2.0;
367
368	String seedString = Utils.getOption('S', options);
369	if (seedString.length() != 0)
370	m_Seed = Long.parseLong(seedString);
371	else
372	m_Seed = 1;
373
374	String runString = Utils.getOption('O', options);
375	if (runString.length() != 0)
376	m_Optimizations = Integer.parseInt(runString);
377	else
378	m_Optimizations = 2;
379
380	String tNormString = Utils.getOption('p', options);
381	if (tNormString.length() != 0)
382	m_tNorm = Integer.parseInt(tNormString);
383	else
384	m_tNorm = TNORM_PROD;
385
386	String uncovActionString = Utils.getOption('s', options);
387	if (uncovActionString.length() != 0)
388	m_uncovAction = Integer.parseInt(uncovActionString);
389	else
390	m_uncovAction = UNCOVACTION_STRETCH;
391
392	m_Debug = Utils.getFlag('D', options);
393
394	m_CheckErr = !Utils.getFlag('E', options);
395	}
396
397	/**
398	* Gets the current settings of the Classifier.
399	*
400	* @return an array of strings suitable for passing to setOptions
401	*/
402	public String [] getOptions() {
403
404	String [] options = new String [14];
405	int current = 0;
406	options[current++] = "-F"; options[current++] = "" + m_Folds;
407	options[current++] = "-N"; options[current++] = "" + m_MinNo;
408	options[current++] = "-O"; options[current++] = "" + m_Optimizations;
409	options[current++] = "-S"; options[current++] = "" + m_Seed;
410	options[current++] = "-p"; options[current++] = "" + m_tNorm;
411	options[current++] = "-s"; options[current++] = "" + m_uncovAction;
412
413	if(m_Debug)
414	options[current++] = "-D";
415
416	if(!m_CheckErr)
417	options[current++] = "-E";
418
419	while(current < options.length)
420	options[current++] = "";
421
422	return options;
423	}
424
425	/**
426	* Returns an enumeration of the additional measure names
427	* @return an enumeration of the measure names
428	*/
429	public Enumeration enumerateMeasures() {
430	Vector newVector = new Vector(1);
431	newVector.addElement("measureNumRules");
432	return newVector.elements();
433	}
434
435	/**
436	* Returns the value of the named measure
437	* @param additionalMeasureName the name of the measure to query for its value
438	* @return the value of the named measure
439	* @throws IllegalArgumentException if the named measure is not supported
440	*/
441	public double getMeasure(String additionalMeasureName) {
442	if (additionalMeasureName.compareToIgnoreCase("measureNumRules") == 0)
443	return m_Ruleset.size();
444	else
445	throw new IllegalArgumentException(additionalMeasureName+" not supported (FURIA)");
446	}
447
448	/**
449	* Returns the tip text for this property
450	* @return tip text for this property suitable for
451	* displaying in the explorer/experimenter gui
452	*/
453	public String foldsTipText() {
454	return "Determines the amount of data used for pruning. One fold is used for "
455	+ "pruning, the rest for growing the rules.";
456	}
457
458	/**
459	* Sets the number of folds to use
460	*
461	* @param fold the number of folds
462	*/
463	public void setFolds(int fold) {
464	m_Folds = fold;
465	}
466
467	/**
468	* Gets the number of folds
469	*
470	* @return the number of folds
471	*/
472	public int getFolds(){
473	return m_Folds;
474	}
475
476	/**
477	* Returns the tip text for this property
478	* @return tip text for this property suitable for
479	* displaying in the explorer/experimenter gui
480	*/
481	public String minNoTipText() {
482	return "The minimum total weight of the instances in a rule.";
483	}
484
485	/**
486	* Sets the minimum total weight of the instances in a rule
487	*
488	* @param m the minimum total weight of the instances in a rule
489	*/
490	public void setMinNo(double m) {
491	m_MinNo = m;
492	}
493
494	/**
495	* Gets the minimum total weight of the instances in a rule
496	*
497	* @return the minimum total weight of the instances in a rule
498	*/
499	public double getMinNo(){
500	return m_MinNo;
501	}
502
503	/**
504	* Returns the tip text for this property
505	* @return tip text for this property suitable for
506	* displaying in the explorer/experimenter gui
507	*/
508	public String seedTipText() {
509	return "The seed used for randomizing the data.";
510	}
511
512	/**
513	* Sets the seed value to use in randomizing the data
514	*
515	* @param s the new seed value
516	*/
517	public void setSeed(long s) {
518	m_Seed = s;
519	}
520
521	/**
522	* Gets the current seed value to use in randomizing the data
523	*
524	* @return the seed value
525	*/
526	public long getSeed(){
527	return m_Seed;
528	}
529
530	/**
531	* Returns the tip text for this property
532	* @return tip text for this property suitable for
533	* displaying in the explorer/experimenter gui
534	*/
535	public String optimizationsTipText() {
536	return "The number of optimization runs.";
537	}
538
539	/**
540	* Sets the number of optimization runs
541	*
542	* @param run the number of optimization runs
543	*/
544	public void setOptimizations(int run) {
545	m_Optimizations = run;
546	}
547
548	/**
549	* Gets the the number of optimization runs
550	*
551	* @return the number of optimization runs
552	*/
553	public int getOptimizations() {
554	return m_Optimizations;
555	}
556
557	/**
558	* Returns the tip text for this property
559	* @return tip text for this property suitable for
560	* displaying in the explorer/experimenter gui
561	*/
562	public String debugTipText() {
563	return "Whether debug information is output to the console.";
564	}
565
566	/**
567	* Sets whether debug information is output to the console
568	*
569	* @param d whether debug information is output to the console
570	*/
571	public void setDebug(boolean d) {
572	m_Debug = d;
573	}
574
575	/**
576	* Gets whether debug information is output to the console
577	*
578	* @return whether debug information is output to the console
579	*/
580	public boolean getDebug(){
581	return m_Debug;
582	}
583
584	/**
585	* Returns the tip text for this property
586	* @return tip text for this property suitable for
587	* displaying in the explorer/experimenter gui
588	*/
589	public String checkErrorRateTipText() {
590	return "Whether check for error rate >= 1/2 is included" +
591	" in stopping criterion.";
592	}
593
594	/**
595	* Sets whether to check for error rate is in stopping criterion
596	*
597	* @param d whether to check for error rate is in stopping criterion
598	*/
599	public void setCheckErrorRate(boolean d) {
600	m_CheckErr = d;
601	}
602
603	/**
604	* Gets whether to check for error rate is in stopping criterion
605	*
606	* @return true if checking for error rate is in stopping criterion
607	*/
608	public boolean getCheckErrorRate(){
609	return m_CheckErr;
610	}
611
612	/**
613	* Returns the tip text for this property
614	* @return tip text for this property suitable for
615	* displaying in the explorer/experimenter gui
616	*/
617	public String uncovActionTipText() {
618	return "Selet the action that is performed for uncovered instances.";
619	}
620
621	/**
622	* Gets the action that is performed for uncovered instances.
623	* It can be UNCOVACTION_STRETCH, UNCOVACTION_APRIORI or
624	* UNCOVACTION_REJECT.
625	* @return the current TNorm.
626	*/
627	public SelectedTag getUncovAction() {
628	return new SelectedTag(m_uncovAction, TAGS_UNCOVACTION);
629	}
630
631	/**
632	* Sets the action that is performed for uncovered instances.
633	* It can be UNCOVACTION_STRETCH, UNCOVACTION_APRIORI or
634	* UNCOVACTION_REJECT.
635	* @param newUncovAction the new action.
636	*/
637	public void setUncovAction(SelectedTag newUncovAction) {
638	if (newUncovAction.getTags() == TAGS_UNCOVACTION) {
639	m_uncovAction = newUncovAction.getSelectedTag().getID();
640	}
641	}
642
643	/**
644	* Returns the tip text for this property
645	* @return tip text for this property suitable for
646	* displaying in the explorer/experimenter gui
647	*/
648	public String TNormTipText() {
649	return "Choose the T-Norm that is used as fuzzy AND-operator.";
650	}
651
652	/**
653	* Gets the TNorm used. Will be either TNORM_PROD or TNORM_MIN.
654	*
655	* @return the current TNorm.
656	*/
657	public SelectedTag getTNorm() {
658	return new SelectedTag(m_tNorm, TAGS_TNORM);
659	}
660
661	/**
662	* Sets the TNorm used. Will be either TNORM_PROD or TNORM_MIN.
663	*
664	* @param newTNorm the new TNorm.
665	*/
666	public void setTNorm(SelectedTag newTNorm) {
667	if (newTNorm.getTags() == TAGS_TNORM) {
668	m_tNorm = newTNorm.getSelectedTag().getID();
669	}
670	}
671
672
673	/**
674	* Get the ruleset generated by FURIA
675	*
676	* @return the ruleset
677	*/
678	public FastVector getRuleset(){ return m_Ruleset; }
679
680	/**
681	* Get the statistics of the ruleset in the given position
682	*
683	* @param pos the position of the stats, assuming correct
684	* @return the statistics of the ruleset in the given position
685	*/
686	public RuleStats getRuleStats(int pos) {
687	return (RuleStats)m_RulesetStats.elementAt(pos);
688	}
689
690	/**
691	* The single antecedent in the rule, which is composed of an attribute and
692	* the corresponding value. There are two inherited classes, namely NumericAntd
693	* and NominalAntd in which the attributes are numeric and nominal respectively.
694	*/
695	protected abstract class Antd
696	implements WeightedInstancesHandler, Copyable, Serializable {
697
698	/** The attribute of the antecedent */
699	public Attribute att;
700
701	/** The attribute value of the antecedent.
702	For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */
703	public double value;
704
705	/** The maximum infoGain achieved by this antecedent test
706	* in the growing data */
707	protected double maxInfoGain;
708
709	/** The accurate rate of this antecedent test on the growing data */
710	protected double accuRate;
711
712	/** The coverage of this antecedent in the growing data */
713	protected double cover;
714
715	/** The accurate data for this antecedent in the growing data */
716	protected double accu;
717
718
719	/** Confidence / weight of this rule for the rule stretching procedure that
720	* is returned when this is the last antecedent of the rule. */
721	double weightOfTheRuleWhenItIsPrunedAfterThisAntecedent = 0;
722
723	/** Confidence / weight of this antecedent. */
724	public double m_confidence = 0.0;
725
726	/**
727	* Constructor
728	*/
729	public Antd(Attribute a){
730	att=a;
731	value=Double.NaN;
732	maxInfoGain = 0;
733	accuRate = Double.NaN;
734	cover = Double.NaN;
735	accu = Double.NaN;
736	}
737
738	/* The abstract members for inheritance */
739	public abstract Instances[] splitData(Instances data, double defAcRt,
740	double cla);
741	public abstract double covers(Instance inst);
742	public abstract String toString();
743
744	/**
745	* Implements Copyable
746	*
747	* @return a copy of this object
748	*/
749	public abstract Object copy();
750
751	/* Get functions of this antecedent */
752	public Attribute getAttr(){ return att; }
753	public double getAttrValue(){ return value; }
754	public double getMaxInfoGain(){ return maxInfoGain; }
755	public double getAccuRate(){ return accuRate; }
756	public double getAccu(){ return accu; }
757	public double getCover(){ return cover; }
758	}
759
760	/**
761	* The antecedent with numeric attribute
762	*/
763	public class
764	NumericAntd extends Antd {
765
766	/** for serialization */
767	static final long serialVersionUID = 5699457269983735442L;
768
769	/** The split point for this numeric antecedent */
770	public double splitPoint;
771
772	/** The edge point for the fuzzy set of this numeric antecedent */
773	public double supportBound;
774
775	/** A flag determining whether this antecedent was successfully fuzzified yet*/
776	public boolean fuzzyYet = false;
777
778
779	/**
780	* Constructor
781	*/
782	public NumericAntd(Attribute a){
783	super(a);
784	splitPoint = Double.NaN;
785	supportBound = Double.NaN;
786	}
787
788	/**
789	* Get split point of this numeric antecedent
790	*
791	* @return the split point of this numeric antecedent
792	*/
793	public double getSplitPoint(){
794	return splitPoint;
795	}
796
797	/**
798	* Implements Copyable
799	*
800	* @return a copy of this object
801	*/
802	public Object copy(){
803	NumericAntd na = new NumericAntd(getAttr());
804	na.m_confidence = m_confidence;
805	na.value = this.value;
806	na.splitPoint = this.splitPoint;
807	na.supportBound = this.supportBound;
808	na.fuzzyYet = this.fuzzyYet;
809	return na;
810	}
811
812
813
814	/**
815	* Implements the splitData function.
816	* This procedure is to split the data into two bags according
817	* to the information gain of the numeric attribute value
818	* The maximum infoGain is also calculated.
819	*
820	* @param insts the data to be split
821	* @param defAcRt the default accuracy rate for data
822	* @param cl the class label to be predicted
823	* @return the array of data after split
824	*/
825	public Instances[] splitData(Instances insts, double defAcRt,
826	double cl){
827	Instances data = insts;
828	int total=data.numInstances();// Total number of instances without
829	// missing value for att
830
831	int split=1; // Current split position
832	int prev=0; // Previous split position
833	int finalSplit=split; // Final split position
834	maxInfoGain = 0;
835	value = 0;
836
837	double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0;
838
839	data.sort(att);
840	// Find the las instance without missing value
841	for(int x=0; x<data.numInstances(); x++){
842	Instance inst = data.instance(x);
843	if(inst.isMissing(att)){
844	total = x;
845	break;
846	}
847
848	sndCover += inst.weight();
849	if(Utils.eq(inst.classValue(), cl))
850	sndAccu += inst.weight();
851	}
852
853	if(total == 0) return null; // Data all missing for the attribute
854	splitPoint = data.instance(total-1).value(att);
855
856	for(; split <= total; split++){
857	if((split == total) \|\|
858	(data.instance(split).value(att) > // Can't split within
859	data.instance(prev).value(att))){ // same value
860
861	for(int y=prev; y<split; y++){
862	Instance inst = data.instance(y);
863	fstCover += inst.weight();
864	if(Utils.eq(data.instance(y).classValue(), cl)){
865	fstAccu += inst.weight(); // First bag positive# ++
866	}
867	}
868
869	double fstAccuRate = (fstAccu+1.0)/(fstCover+1.0),
870	sndAccuRate = (sndAccu+1.0)/(sndCover+1.0);
871
872	/* Which bag has higher information gain? */
873	boolean isFirst;
874	double fstInfoGain, sndInfoGain;
875	double accRate, infoGain, coverage, accurate;
876
877	fstInfoGain =
878	//Utils.eq(defAcRt, 1.0) ?
879	//fstAccu/(double)numConds :
880	fstAccu*(Utils.log2(fstAccuRate)-Utils.log2(defAcRt));
881
882	sndInfoGain =
883	//Utils.eq(defAcRt, 1.0) ?
884	//sndAccu/(double)numConds :
885	sndAccu*(Utils.log2(sndAccuRate)-Utils.log2(defAcRt));
886
887	if(fstInfoGain > sndInfoGain){
888	isFirst = true;
889	infoGain = fstInfoGain;
890	accRate = fstAccuRate;
891	accurate = fstAccu;
892	coverage = fstCover;
893	}
894	else{
895	isFirst = false;
896	infoGain = sndInfoGain;
897	accRate = sndAccuRate;
898	accurate = sndAccu;
899	coverage = sndCover;
900	}
901
902	/* Check whether so far the max infoGain */
903	if(infoGain > maxInfoGain){
904	splitPoint = data.instance(prev).value(att);
905	value = (isFirst) ? 0 : 1;
906	accuRate = accRate;
907	accu = accurate;
908	cover = coverage;
909	maxInfoGain = infoGain;
910	finalSplit = (isFirst) ? split : prev;
911	}
912
913	for(int y=prev; y<split; y++){
914	Instance inst = data.instance(y);
915	sndCover -= inst.weight();
916	if(Utils.eq(data.instance(y).classValue(), cl)){
917	sndAccu -= inst.weight(); // Second bag positive# --
918	}
919	}
920	prev=split;
921	}
922	}
923
924	/* Split the data */
925	Instances[] splitData = new Instances[2];
926	splitData[0] = new Instances(data, 0, finalSplit);
927	splitData[1] = new Instances(data, finalSplit, total-finalSplit);
928
929	return splitData;
930	}
931
932
933	/**
934	* The degree of coverage for the instance given that antecedent
935	*
936	* @param inst the instance in question
937	* @return the numeric value indicating the membership of the instance
938	* for this antecedent
939	*/
940	public double covers(Instance inst){
941	double isCover=0;
942	if(!inst.isMissing(att)){
943	if((int)value == 0){ // First bag
944	if(inst.value(att) <= splitPoint)
945	isCover=1;
946	else if(fuzzyYet && (inst.value(att) > splitPoint) && (inst.value(att) < supportBound ))
947	isCover= 1-((inst.value(att) - splitPoint)/(supportBound-splitPoint));
948	}else{
949	if(inst.value(att) >= splitPoint) // Second bag
950	isCover=1;
951	else if(fuzzyYet && inst.value(att) < splitPoint && (inst.value(att) > supportBound ))
952	isCover= 1-((splitPoint - inst.value(att)) /(splitPoint-supportBound));
953	}
954	}
955
956	return isCover;
957	}
958
959	/**
960	* Prints this antecedent
961	*
962	* @return a textual description of this antecedent
963	*/
964	public String toString() {
965	if (value == 0){
966	if (fuzzyYet){
967	return (att.name() + " in [-inf, -inf, " + Utils.doubleToString(splitPoint, 6) + ", " + Utils.doubleToString(supportBound, 6) + "]");
968	}
969	return (att.name() + " in [-inf, " + Utils.doubleToString(splitPoint, 6) + "]");
970	}else{
971	if (fuzzyYet){
972	return (att.name() + " in [" + Utils.doubleToString(supportBound, 6) + ", " + Utils.doubleToString(splitPoint, 6) + ", inf, inf]");
973	}
974	return (att.name() + " in [" + Utils.doubleToString(splitPoint, 6) + ", inf]");
975	}
976
977	}
978
979	}
980
981
982	/**
983	* The antecedent with nominal attribute
984	*/
985	protected class NominalAntd
986	extends Antd{
987
988	/** for serialization */
989	static final long serialVersionUID = -9102297038837585135L;
990
991	/* The parameters of infoGain calculated for each attribute value
992	* in the growing data */
993	private double[] accurate;
994	private double[] coverage;
995
996	/**
997	* Constructor
998	*/
999	public NominalAntd(Attribute a){
1000	super(a);
1001	int bag = att.numValues();
1002	accurate = new double[bag];
1003	coverage = new double[bag];
1004	}
1005
1006	/**
1007	* Implements Copyable
1008	*
1009	* @return a copy of this object
1010	*/
1011	public Object copy(){
1012	Antd antec = new NominalAntd(getAttr());
1013	antec.m_confidence = m_confidence;
1014	antec.value = this.value;
1015	return antec;
1016	}
1017
1018	/**
1019	* Implements the splitData function.
1020	* This procedure is to split the data into bags according
1021	* to the nominal attribute value
1022	* The infoGain for each bag is also calculated.
1023	*
1024	* @param data the data to be split
1025	* @param defAcRt the default accuracy rate for data
1026	* @param cl the class label to be predicted
1027	* @return the array of data after split
1028	*/
1029	public Instances[] splitData(Instances data, double defAcRt,
1030	double cl){
1031	int bag = att.numValues();
1032	Instances[] splitData = new Instances[bag];
1033
1034	for(int x=0; x<bag; x++){
1035	splitData[x] = new Instances(data, data.numInstances());
1036	accurate[x] = 0;
1037	coverage[x] = 0;
1038	}
1039
1040	for(int x=0; x<data.numInstances(); x++){
1041	Instance inst=data.instance(x);
1042	if(!inst.isMissing(att)){
1043	int v = (int)inst.value(att);
1044	splitData[v].add(inst);
1045	coverage[v] += inst.weight();
1046	if((int)inst.classValue() == (int)cl)
1047	accurate[v] += inst.weight();
1048	}
1049	}
1050
1051	for(int x=0; x<bag; x++){
1052	double t = coverage[x]+1.0;
1053	double p = accurate[x] + 1.0;
1054	double infoGain =
1055	//Utils.eq(defAcRt, 1.0) ?
1056	//accurate[x]/(double)numConds :
1057	accurate[x]*(Utils.log2(p/t)-Utils.log2(defAcRt));
1058
1059	if(infoGain > maxInfoGain){
1060	maxInfoGain = infoGain;
1061	cover = coverage[x];
1062	accu = accurate[x];
1063	accuRate = p/t;
1064	value = (double)x;
1065	}
1066	}
1067
1068	return splitData;
1069	}
1070
1071	/**
1072	* Whether the instance is covered by this antecedent
1073	*
1074	* @param inst the instance in question
1075	* @return the boolean value indicating whether the instance is
1076	* covered by this antecedent
1077	*/
1078	public double covers(Instance inst){
1079	double isCover=0;
1080	if(!inst.isMissing(att)){
1081	if((int)inst.value(att) == (int)value)
1082	isCover=1;
1083	}
1084	return isCover;
1085	}
1086
1087	/**
1088	* Prints this antecedent
1089	*
1090	* @return a textual description of this antecedent
1091	*/
1092	public String toString() {
1093	return (att.name() + " = " +att.value((int)value));
1094	}
1095	}
1096
1097
1098	/**
1099	* This class implements a single rule that predicts specified class.
1100	*
1101	* A rule consists of antecedents "AND"ed together and the consequent
1102	* (class value) for the classification.
1103	* In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to
1104	* select an antecedent and Reduced Error Prunning (REP) with the metric
1105	* of accuracy rate p/(p+n) or (TP+TN)/(P+N) is used to prune the rule.
1106	*/
1107	public class RipperRule
1108	extends Rule{
1109
1110	/** for serialization */
1111	static final long serialVersionUID = -2410020717305262952L;
1112
1113	/** The internal representation of the class label to be predicted */
1114	double m_Consequent = -1;
1115
1116	/** The vector of antecedents of this rule*/
1117	public FastVector m_Antds = null;
1118
1119	/** Constructor */
1120	public RipperRule(){
1121	m_Antds = new FastVector();
1122	}
1123
1124	/**
1125	* Sets the internal representation of the class label to be predicted
1126	*
1127	* @param cl the internal representation of the class label to be predicted
1128	*/
1129	public void setConsequent(double cl) {
1130	m_Consequent = cl;
1131	}
1132
1133	/**
1134	* Gets the internal representation of the class label to be predicted
1135	*
1136	* @return the internal representation of the class label to be predicted
1137	*/
1138	public double getConsequent() {
1139	return m_Consequent;
1140	}
1141
1142	/**
1143	* Get a shallow copy of this rule
1144	*
1145	* @return the copy
1146	*/
1147	public Object copy(){
1148	RipperRule copy = new RipperRule();
1149	copy.setConsequent(getConsequent());
1150	copy.m_Antds = (FastVector)this.m_Antds.copyElements();
1151	return copy;
1152	}
1153
1154
1155
1156	/**
1157	* The degree of coverage instance covered by this rule
1158	*
1159	* @param datum the instance in question
1160	* @return the degree to which the instance
1161	* is covered by this rule
1162	*/
1163	public double coverageDegree(Instance datum){
1164	double coverage = 1;
1165
1166	for(int i=0; i<m_Antds.size(); i++){
1167	Antd antd = (Antd)m_Antds.elementAt(i);
1168	if(m_tNorm == TNORM_PROD){
1169	// Product T-Norm
1170	if (antd instanceof NumericAntd)
1171	coverage *= ((NumericAntd)antd).covers(datum);
1172	else
1173	coverage *= antd.covers(datum);
1174	}else{
1175	// Min T-Norm
1176	if (antd instanceof NumericAntd)
1177	coverage = Math.min(coverage, ((NumericAntd)antd).covers(datum));
1178	else
1179	coverage = Math.min(coverage, antd.covers(datum));
1180	}
1181
1182	}
1183
1184	return coverage;
1185	}
1186
1187	/**
1188	* Whether the instance covered by this rule
1189	*
1190	* @param datum the instance in question
1191	* @return the boolean value indicating whether the instance
1192	* is covered by this rule
1193	*/
1194	public boolean covers(Instance datum){
1195	if (coverageDegree(datum) == 0){
1196	return false;
1197	}else{
1198	return true;
1199	}
1200	}
1201
1202	/**
1203	* Whether this rule has antecedents, i.e. whether it is a default rule
1204	*
1205	* @return the boolean value indicating whether the rule has antecedents
1206	*/
1207	public boolean hasAntds(){
1208	if (m_Antds == null)
1209	return false;
1210	else
1211	return (m_Antds.size() > 0);
1212	}
1213
1214	/**
1215	* the number of antecedents of the rule
1216	*
1217	* @return the size of this rule
1218	*/
1219	public double size(){ return (double)m_Antds.size(); }
1220
1221
1222	/**
1223	* Private function to compute default number of accurate instances
1224	* in the specified data for the consequent of the rule
1225	*
1226	* @param data the data in question
1227	* @return the default accuracy number
1228	*/
1229	private double computeDefAccu(Instances data){
1230	double defAccu=0;
1231	for(int i=0; i<data.numInstances(); i++){
1232	Instance inst = data.instance(i);
1233	if((int)inst.classValue() == (int)m_Consequent)
1234	defAccu += inst.weight();
1235	}
1236	return defAccu;
1237	}
1238
1239
1240	/**
1241	* Build one rule using the growing data
1242	*
1243	* @param data the growing data used to build the rule
1244	* @throws Exception if the consequent is not set yet
1245	*/
1246	public void grow(Instances data) throws Exception {
1247	if(m_Consequent == -1)
1248	throw new Exception(" Consequent not set yet.");
1249
1250	Instances growData = data;
1251	double sumOfWeights = growData.sumOfWeights();
1252	if(!Utils.gr(sumOfWeights, 0.0))
1253	return;
1254
1255	/* Compute the default accurate rate of the growing data */
1256	double defAccu = computeDefAccu(growData);
1257	double defAcRt = (defAccu+1.0)/(sumOfWeights+1.0);
1258
1259	/* Keep the record of which attributes have already been used*/
1260	boolean[] used=new boolean [growData.numAttributes()];
1261	for (int k=0; k<used.length; k++)
1262	used[k]=false;
1263	int numUnused=used.length;
1264
1265	// If there are already antecedents existing
1266	for(int j=0; j < m_Antds.size(); j++){
1267	Antd antdj = (Antd)m_Antds.elementAt(j);
1268	if(!antdj.getAttr().isNumeric()){
1269	used[antdj.getAttr().index()]=true;
1270	numUnused--;
1271	}
1272	}
1273
1274	double maxInfoGain;
1275	while (Utils.gr(growData.numInstances(), 0.0) &&
1276	(numUnused > 0)
1277	&& Utils.sm(defAcRt, 1.0)
1278	){
1279
1280	// We require that infoGain be positive
1281	/*if(numAntds == originalSize)
1282	maxInfoGain = 0.0; // At least one condition allowed
1283	else
1284	maxInfoGain = Utils.eq(defAcRt, 1.0) ?
1285	defAccu/(double)numAntds : 0.0; */
1286	maxInfoGain = 0.0;
1287
1288	/* Build a list of antecedents */
1289	Antd oneAntd=null;
1290	Instances coverData = null;
1291	Enumeration enumAttr=growData.enumerateAttributes();
1292
1293	/* Build one condition based on all attributes not used yet*/
1294	while (enumAttr.hasMoreElements()){
1295	Attribute att= (Attribute)(enumAttr.nextElement());
1296
1297	if(m_Debug)
1298	System.err.println("\nOne condition: size = "
1299	+ growData.sumOfWeights());
1300
1301	Antd antd =null;
1302	if(att.isNumeric())
1303	antd = new NumericAntd(att);
1304	else
1305	antd = new NominalAntd(att);
1306
1307	if(!used[att.index()]){
1308	/* Compute the best information gain for each attribute,
1309	it's stored in the antecedent formed by this attribute.
1310	This procedure returns the data covered by the antecedent*/
1311	Instances coveredData = computeInfoGain(growData, defAcRt,
1312	antd);
1313	if(coveredData != null){
1314	double infoGain = antd.getMaxInfoGain();
1315	if(m_Debug)
1316	System.err.println("Test of \'"+antd.toString()+
1317	"\': infoGain = "+
1318	infoGain + " \| Accuracy = " +
1319	antd.getAccuRate()+
1320	"="+antd.getAccu()
1321	+"/"+antd.getCover()+
1322	" def. accuracy: "+defAcRt);
1323
1324	if(infoGain > maxInfoGain){
1325	oneAntd=antd;
1326	coverData = coveredData;
1327	maxInfoGain = infoGain;
1328	}
1329	}
1330	}
1331	}
1332
1333	if(oneAntd == null) break; // Cannot find antds
1334	if(Utils.sm(oneAntd.getAccu(), m_MinNo)) break;// Too low coverage
1335
1336	//Numeric attributes can be used more than once
1337	if(!oneAntd.getAttr().isNumeric()){
1338	used[oneAntd.getAttr().index()]=true;
1339	numUnused--;
1340	}
1341
1342	m_Antds.addElement(oneAntd);
1343
1344
1345	growData = coverData;// Grow data size is shrinking
1346	defAcRt = oneAntd.getAccuRate();
1347	}
1348	}
1349
1350
1351	/**
1352	* Compute the best information gain for the specified antecedent
1353	*
1354	* @param instances the data based on which the infoGain is computed
1355	* @param defAcRt the default accuracy rate of data
1356	* @param antd the specific antecedent
1357	* @return the data covered by the antecedent
1358	*/
1359	private Instances computeInfoGain(Instances instances, double defAcRt,
1360	Antd antd){
1361	Instances data = instances;
1362
1363	/* Split the data into bags.
1364	The information gain of each bag is also calculated in this procedure */
1365	Instances[] splitData = antd.splitData(data, defAcRt,
1366	m_Consequent);
1367
1368	/* Get the bag of data to be used for next antecedents */
1369	if(splitData != null)
1370	return splitData[(int)antd.getAttrValue()];
1371	else return null;
1372	}
1373
1374	/**
1375	* Prune all the possible final sequences of the rule using the
1376	* pruning data. The measure used to prune the rule is based on
1377	* flag given.
1378	*
1379	* @param pruneData the pruning data used to prune the rule
1380	* @param useWhole flag to indicate whether use the error rate of
1381	* the whole pruning data instead of the data covered
1382	*/
1383	public void prune(Instances pruneData, boolean useWhole){
1384	Instances data = pruneData;
1385
1386	double total = data.sumOfWeights();
1387	if(!Utils.gr(total, 0.0))
1388	return;
1389
1390	/* The default accurate # and rate on pruning data */
1391	double defAccu=computeDefAccu(data);
1392
1393	if(m_Debug)
1394	System.err.println("Pruning with " + defAccu +
1395	" positive data out of " + total +
1396	" instances");
1397
1398	int size=m_Antds.size();
1399	if(size == 0) return; // Default rule before pruning
1400
1401	double[] worthRt = new double[size];
1402	double[] coverage = new double[size];
1403	double[] worthValue = new double[size];
1404	for(int w=0; w<size; w++){
1405	worthRt[w]=coverage[w]=worthValue[w]=0.0;
1406	}
1407
1408	/* Calculate accuracy parameters for all the antecedents in this rule */
1409	double tn = 0.0; // True negative if useWhole
1410	for(int x=0; x<size; x++){
1411	Antd antd=(Antd)m_Antds.elementAt(x);
1412	Instances newData = data;
1413	data = new Instances(newData, 0); // Make data empty
1414
1415	for(int y=0; y<newData.numInstances(); y++){
1416	Instance ins=newData.instance(y);
1417
1418	if(antd.covers(ins)>0){ // Covered by this antecedent
1419	coverage[x] += ins.weight();
1420	data.add(ins); // Add to data for further pruning
1421	if((int)ins.classValue() == (int)m_Consequent) // Accurate prediction
1422	worthValue[x] += ins.weight();
1423	}
1424	else if(useWhole){ // Not covered
1425	if((int)ins.classValue() != (int)m_Consequent)
1426	tn += ins.weight();
1427	}
1428	}
1429
1430	if(useWhole){
1431	worthValue[x] += tn;
1432	worthRt[x] = worthValue[x] / total;
1433	}
1434	else // Note if coverage is 0, accuracy is 0.5
1435	worthRt[x] = (worthValue[x]+1.0)/(coverage[x]+2.0);
1436	}
1437
1438	double maxValue = (defAccu+1.0)/(total+2.0);
1439	int maxIndex = -1;
1440	for(int i=0; i<worthValue.length; i++){
1441	if(m_Debug){
1442	double denom = useWhole ? total : coverage[i];
1443	System.err.println(i+"(useAccuray? "+!useWhole+"): "
1444	+ worthRt[i] +
1445	"="+worthValue[i]+
1446	"/"+denom);
1447	}
1448	if(worthRt[i] > maxValue){ // Prefer to the
1449	maxValue = worthRt[i]; // shorter rule
1450	maxIndex = i;
1451	}
1452	}
1453
1454	if (maxIndex==-1) return;
1455
1456	/* Prune the antecedents according to the accuracy parameters */
1457	for(int z=size-1;z>maxIndex;z--)
1458	m_Antds.removeElementAt(z);
1459	}
1460
1461	/**
1462	* Prints this rule
1463	*
1464	* @param classAttr the class attribute in the data
1465	* @return a textual description of this rule
1466	*/
1467	public String toString(Attribute classAttr) {
1468	StringBuffer text = new StringBuffer();
1469	if(m_Antds.size() > 0){
1470	for(int j=0; j< (m_Antds.size()-1); j++)
1471	text.append("(" + ((Antd)(m_Antds.elementAt(j))).toString()+ ") and ");
1472	text.append("("+((Antd)(m_Antds.lastElement())).toString() + ")");
1473	}
1474	text.append(" => " + classAttr.name() +
1475	"=" + classAttr.value((int)m_Consequent));
1476
1477	return text.toString();
1478	}
1479
1480	/**
1481	* The fuzzification procedure
1482	* @param data training data
1483	* @param allWeightsAreOne flag whether all instances have weight 1. If this is the case branch-and-bound is possible for speed-up.
1484	*/
1485	public void fuzzify(Instances data, boolean allWeightsAreOne){
1486	// Determine whether there are numeric antecedents that can be fuzzified.
1487	if (m_Antds == null) return;
1488	int numNumericAntds = 0;
1489	for (int i = 0; i < m_Antds.size(); i++){
1490	if (m_Antds.elementAt(i) instanceof NumericAntd)
1491	numNumericAntds++;
1492	}
1493	if (numNumericAntds == 0)
1494	return;
1495
1496	double maxPurity = Double.NEGATIVE_INFINITY;
1497	boolean[] finishedAntecedents = new boolean[m_Antds.size()];
1498	int numFinishedAntecedents = 0;
1499
1500	// Loop until all antecdents have been fuzzified
1501	while (numFinishedAntecedents<m_Antds.size()){
1502	double maxPurityOfAllAntecedents = Double.NEGATIVE_INFINITY;
1503	int bestAntecedentsIndex = -1;
1504	double bestSupportBoundForAllAntecedents = Double.NaN;
1505
1506	Instances relevantData = new Instances(data,0);
1507	for (int j = 0; j < m_Antds.size(); j++){
1508	if(finishedAntecedents[j]) continue;
1509
1510	relevantData = new Instances (data);
1511	/*
1512	* Remove instances which are not relevant, because they are not covered
1513	* by the _other_ antecedents.
1514	*/
1515	for (int k = 0; k < m_Antds.size(); k++){
1516	if (k==j) continue;
1517	Antd exclusionAntd = ((Antd)m_Antds.elementAt(k));
1518	for (int y = 0; y < relevantData.numInstances(); y++){
1519	if (exclusionAntd.covers(relevantData.instance(y)) == 0){
1520	relevantData.delete(y--);
1521	}
1522	}
1523	}
1524
1525	// test whether this antecedent is numeric and whether there is data for making it fuzzy
1526	if (relevantData.attribute(((Antd)m_Antds.elementAt(j)).att.index()).isNumeric() && relevantData.numInstances()>0){
1527	// Get a working copy of this antecedent
1528	NumericAntd currentAntd = (NumericAntd) ((NumericAntd) m_Antds.elementAt(j)).copy();
1529	currentAntd.fuzzyYet=true;
1530
1531	relevantData.deleteWithMissing(currentAntd.att.index());
1532
1533	double sumOfWeights = relevantData.sumOfWeights();
1534	if(!Utils.gr(sumOfWeights, 0.0))
1535	return;
1536
1537	relevantData.sort(currentAntd.att.index());
1538
1539	double maxPurityForThisAntecedent = 0;
1540	double bestFoundSupportBound = Double.NaN;
1541
1542	double lastAccu = 0;
1543	double lastCover = 0;
1544	// Test all possible edge points
1545	if (currentAntd.value == 0){
1546	for (int k = 1; k < relevantData.numInstances(); k++){
1547	// break the loop if there is no gain (only works when all instances have weight 1)
1548	if ((lastAccu+(relevantData.numInstances()-k-1))/(lastCover+(relevantData.numInstances()-k-1)) < maxPurityForThisAntecedent && allWeightsAreOne){
1549	break;
1550	}
1551
1552	// Bag 1
1553	if (currentAntd.splitPoint < relevantData.instance(k).value(currentAntd.att.index())
1554	&& relevantData.instance(k).value(currentAntd.att.index()) != relevantData.instance(k-1).value(currentAntd.att.index())){
1555	currentAntd.supportBound = relevantData.instance(k).value(currentAntd.att.index());
1556
1557	// Calculate the purity of this fuzzification
1558	double[] accuArray = new double[relevantData.numInstances()];
1559	double[] coverArray = new double[relevantData.numInstances()];
1560	for (int i = 0; i < relevantData.numInstances(); i++){
1561	coverArray[i] = relevantData.instance(i).weight();
1562	double coverValue = currentAntd.covers(relevantData.instance(i));
1563	if (coverArray[i] >= coverValue*relevantData.instance(i).weight()){
1564	coverArray[i] = coverValue*relevantData.instance(i).weight();
1565	if (relevantData.instance(i).classValue() == m_Consequent){
1566	accuArray[i] = coverValue*relevantData.instance(i).weight();
1567	}
1568	}
1569	}
1570
1571	// Test whether this fuzzification is the best one for this antecedent.
1572	// Keep it if this is the case.
1573	double purity = (Utils.sum(accuArray)) / (Utils.sum(coverArray));
1574	if (purity >= maxPurityForThisAntecedent){
1575	maxPurityForThisAntecedent =purity;
1576	bestFoundSupportBound = currentAntd.supportBound;
1577	}
1578	lastAccu = Utils.sum(accuArray);
1579	lastCover = Utils.sum(coverArray);
1580	}
1581	}
1582	}else{
1583	for (int k = relevantData.numInstances()-2; k >=0; k--){
1584	// break the loop if there is no gain (only works when all instances have weight 1)
1585	if ((lastAccu+(k))/(lastCover+(k)) < maxPurityForThisAntecedent && allWeightsAreOne){
1586	break;
1587	}
1588
1589	//Bag 2
1590	if (currentAntd.splitPoint > relevantData.instance(k).value(currentAntd.att.index())
1591	&& relevantData.instance(k).value(currentAntd.att.index()) != relevantData.instance(k+1).value(currentAntd.att.index())){
1592	currentAntd.supportBound = relevantData.instance(k).value(currentAntd.att.index());
1593
1594	// Calculate the purity of this fuzzification
1595	double[] accuArray = new double[relevantData.numInstances()];
1596	double[] coverArray = new double[relevantData.numInstances()];
1597	for (int i = 0; i < relevantData.numInstances(); i++){
1598	coverArray[i] = relevantData.instance(i).weight();
1599	double coverValue = currentAntd.covers(relevantData.instance(i));
1600	if (coverArray[i] >= coverValue*relevantData.instance(i).weight()){
1601	coverArray[i] = coverValue*relevantData.instance(i).weight();
1602	if (relevantData.instance(i).classValue() == m_Consequent){
1603	accuArray[i] = coverValue*relevantData.instance(i).weight();
1604	}
1605	}
1606	}
1607
1608	// Test whether this fuzzification is the best one for this antecedent.
1609	// Keep it if this is the case.
1610	double purity = (Utils.sum(accuArray)) / (Utils.sum(coverArray));
1611	if (purity >= maxPurityForThisAntecedent){
1612	maxPurityForThisAntecedent =purity;
1613	bestFoundSupportBound = currentAntd.supportBound;
1614	}
1615	lastAccu = Utils.sum(accuArray);
1616	lastCover = Utils.sum(coverArray);
1617	}
1618	}
1619
1620	}
1621
1622	// Test whether the best fuzzification for this antecedent is the best one of all
1623	// antecedents considered so far.
1624	// Keep it if this is the case.
1625	if (maxPurityForThisAntecedent>maxPurityOfAllAntecedents){
1626	bestAntecedentsIndex = j;
1627	bestSupportBoundForAllAntecedents = bestFoundSupportBound;
1628	maxPurityOfAllAntecedents = maxPurityForThisAntecedent;
1629	}
1630	}else{
1631	// Deal with a nominal antecedent.
1632	// Since there is no fuzzification it is already finished.
1633	finishedAntecedents[j] = true;
1634	numFinishedAntecedents++;
1635	continue;
1636	}
1637	}
1638
1639	// Make the fuzzification step for the current antecedent real.
1640	if (maxPurity <= maxPurityOfAllAntecedents){
1641	if (Double.isNaN(bestSupportBoundForAllAntecedents)){
1642	((NumericAntd)m_Antds.elementAt(bestAntecedentsIndex)).supportBound = ((NumericAntd)m_Antds.elementAt(bestAntecedentsIndex)).splitPoint;
1643	}else{
1644	((NumericAntd)m_Antds.elementAt(bestAntecedentsIndex)).supportBound = bestSupportBoundForAllAntecedents;
1645	((NumericAntd)m_Antds.elementAt(bestAntecedentsIndex)).fuzzyYet = true;
1646	}
1647	maxPurity = maxPurityOfAllAntecedents;
1648	}
1649	finishedAntecedents[bestAntecedentsIndex] = true;
1650	numFinishedAntecedents++;
1651	}
1652
1653	}
1654
1655	/**
1656	* Calculation of the rule weights / confidences for all beginning rule stumps.
1657	* @param data The training data
1658	*/
1659	public void calculateConfidences(Instances data) {
1660	RipperRule tempRule = (RipperRule) this.copy();
1661
1662	while(tempRule.hasAntds()){
1663	double acc = 0;
1664	double cov = 0;
1665	for (int i = 0; i < data.numInstances(); i++){
1666	double membershipValue = tempRule.coverageDegree(data.instance(i)) * data.instance(i).weight();
1667	cov += membershipValue;
1668	if (m_Consequent == data.instance(i).classValue()){
1669	acc += membershipValue;
1670	}
1671	}
1672
1673	// m-estimate
1674	double m = 2.0;
1675	((Antd)this.m_Antds.elementAt((int)tempRule.size()-1)).m_confidence =
1676	(acc+m*(aprioriDistribution[(int)m_Consequent]/
1677	Utils.sum(aprioriDistribution))) / (cov+m);
1678	tempRule.m_Antds.removeElementAt(tempRule.m_Antds.size()-1);
1679	}
1680	}
1681
1682
1683	/**
1684	* Get the rule confidence.
1685	* @return rule confidence / weight
1686	*/
1687	public double getConfidence(){
1688	if (!hasAntds())
1689	return Double.NaN;
1690	return ((Antd)m_Antds.lastElement()).m_confidence;
1691	}
1692
1693	/**
1694	*
1695	*/
1696	public String getRevision() {
1697	return "1.0";
1698	}
1699
1700	}
1701
1702	/**
1703	* Returns default capabilities of the classifier.
1704	*
1705	* @return the capabilities of this classifier
1706	*/
1707	public Capabilities getCapabilities() {
1708	Capabilities result = super.getCapabilities();
1709	result.disableAll();
1710
1711	// attributes
1712	result.enable(Capability.NOMINAL_ATTRIBUTES);
1713	result.enable(Capability.NUMERIC_ATTRIBUTES);
1714	result.enable(Capability.DATE_ATTRIBUTES);
1715	result.enable(Capability.MISSING_VALUES);
1716
1717	// class
1718	result.enable(Capability.NOMINAL_CLASS);
1719	result.enable(Capability.MISSING_CLASS_VALUES);
1720
1721	// instances
1722	result.setMinimumNumberInstances(m_Folds);
1723
1724	return result;
1725	}
1726
1727	/**
1728	* Builds the FURIA rule-based model
1729	*
1730	* @param instances the training data
1731	* @throws Exception if classifier can't be built successfully
1732	*/
1733	public void buildClassifier(Instances instances) throws Exception {
1734	// can classifier handle the data?
1735	getCapabilities().testWithFail(instances);
1736
1737	// remove instances with missing class
1738	instances = new Instances(instances);
1739	instances.deleteWithMissingClass();
1740
1741	// Learn the apriori distribution for later
1742	aprioriDistribution = new double[instances.classAttribute().numValues()];
1743	boolean allWeightsAreOne = true;
1744	for (int i = 0 ; i < instances.numInstances(); i++){
1745	aprioriDistribution[(int)instances.instance(i).classValue()]+=instances.instance(i).weight();
1746	if (allWeightsAreOne && instances.instance(i).weight() != 1.0){
1747	allWeightsAreOne = false;
1748	break;
1749	}
1750	}
1751
1752
1753	m_Random = instances.getRandomNumberGenerator(m_Seed);
1754	m_Total = RuleStats.numAllConditions(instances);
1755	if(m_Debug)
1756	System.err.println("Number of all possible conditions = "+m_Total);
1757
1758	Instances data = new Instances(instances);
1759
1760	m_Class = data.classAttribute();
1761	m_Ruleset = new FastVector();
1762	m_RulesetStats = new FastVector();
1763	m_Distributions = new FastVector();
1764
1765
1766	// Learn a rule set for each single class
1767	oneClass:
1768	for(int y=0; y < data.numClasses(); y++){ // For each class
1769
1770	double classIndex = (double)y;
1771	if(m_Debug){
1772	int ci = (int)classIndex;
1773	System.err.println("\n\nClass "+m_Class.value(ci)+"("+ci+"): "
1774	+ aprioriDistribution[y] + "instances\n"+
1775	"=====================================\n");
1776	}
1777
1778	if(Utils.eq(aprioriDistribution[y],0.0)) // No data for this class
1779	continue oneClass;
1780
1781	// The expected FP/err is the proportion of the class
1782	double expFPRate = (aprioriDistribution[y] / Utils.sum(aprioriDistribution));
1783
1784
1785	double classYWeights = 0, totalWeights = 0;
1786	for(int j=0; j < data.numInstances(); j++){
1787	Instance datum = data.instance(j);
1788	totalWeights += datum.weight();
1789	if((int)datum.classValue() == y){
1790	classYWeights += datum.weight();
1791	}
1792	}
1793
1794	// DL of default rule, no theory DL, only data DL
1795	double defDL;
1796	if(classYWeights > 0)
1797	defDL = RuleStats.dataDL(expFPRate,
1798	0.0,
1799	totalWeights,
1800	0.0,
1801	classYWeights);
1802	else
1803	continue oneClass; // Subsumed by previous rules
1804
1805
1806
1807	if(Double.isNaN(defDL) \|\| Double.isInfinite(defDL))
1808	throw new Exception("Should never happen: "+
1809	"defDL NaN or infinite!");
1810	if(m_Debug)
1811	System.err.println("The default DL = "+defDL);
1812
1813	rulesetForOneClass(expFPRate, data, classIndex, defDL);
1814	}
1815
1816	// Remove redundant antecedents
1817	for(int z=0; z < m_Ruleset.size(); z++){
1818	RipperRule rule = (RipperRule)m_Ruleset.elementAt(z);
1819	for(int j = 0; j < rule.m_Antds.size(); j++){
1820	Antd outerAntd = (Antd)rule.m_Antds.elementAt(j);
1821	for (int k = j+1; k < rule.m_Antds.size(); k++){
1822	Antd innerAntd = (Antd)rule.m_Antds.elementAt(k);
1823	if (outerAntd.att.index() == innerAntd.att.index() && outerAntd.value==innerAntd.value){
1824	rule.m_Antds.setElementAt(rule.m_Antds.elementAt(k), j);
1825	rule.m_Antds.removeElementAt(k--);
1826	}
1827	}
1828	}
1829	}
1830
1831
1832	// Fuzzify all rules
1833	for(int z=0; z < m_RulesetStats.size(); z++){
1834	RuleStats oneClass = (RuleStats)m_RulesetStats.elementAt(z);
1835	for(int xyz=0; xyz < oneClass.getRulesetSize(); xyz++){
1836	RipperRule rule = (RipperRule)((FastVector)oneClass.getRuleset()).elementAt(xyz);
1837
1838	// do the fuzzification for all known antecedents
1839	rule.fuzzify(data, allWeightsAreOne);
1840
1841	double[] classDist = oneClass.getDistributions(xyz);
1842	// Check for sum=0, because otherwise it does not work
1843	if (Utils.sum(classDist)>0) Utils.normalize(classDist);
1844	if(classDist != null)
1845	m_Distributions.addElement(classDist);
1846	}
1847	}
1848
1849
1850	// if there was some problem during fuzzification, set the support bound
1851	// to the trivial fuzzification position
1852	for(int z=0; z < m_Ruleset.size(); z++){
1853	RipperRule rule = (RipperRule)m_Ruleset.elementAt(z);
1854	for(int j = 0; j < rule.m_Antds.size(); j++){
1855	Antd antd = (Antd)rule.m_Antds.elementAt(j);
1856	if (antd instanceof NumericAntd) {
1857	NumericAntd numAntd = (NumericAntd) antd;
1858
1859
1860	if (!numAntd.fuzzyYet){
1861	for (int i = 0; i < data.numInstances(); i++){
1862	if ((numAntd.value == 1 &&
1863	numAntd.splitPoint > data.instance(i).value(numAntd.att.index()) &&
1864	(numAntd.supportBound < data.instance(i).value(numAntd.att.index()) \|\|
1865	!numAntd.fuzzyYet)
1866	)
1867	\|\|
1868	(numAntd.value == 0 &&
1869	numAntd.splitPoint < data.instance(i).value(numAntd.att.index()) &&
1870	(numAntd.supportBound > data.instance(i).value(numAntd.att.index()) \|\|
1871	!numAntd.fuzzyYet)
1872	)
1873	){
1874	numAntd.supportBound = data.instance(i).value(numAntd.att.index());
1875	numAntd.fuzzyYet = true;
1876	}
1877	}
1878
1879	}
1880	}
1881	}
1882	}
1883
1884	//Determine confidences
1885	for(int z=0; z < m_Ruleset.size(); z++){
1886	RipperRule rule = (RipperRule)m_Ruleset.elementAt(z);
1887	rule.calculateConfidences(data);
1888	}
1889	}
1890
1891	/**
1892	* Classify the test instance with the rule learner and provide
1893	* the class distributions
1894	*
1895	* @param datum the instance to be classified
1896	* @return the distribution
1897	* @throws Exception
1898	*/
1899
1900	public double[] distributionForInstance(Instance datum) throws Exception{
1901	//test for multiple overlap of rules
1902	double[] rulesCoveringForEachClass = new double[datum.numClasses()];
1903	for(int i=0; i < m_Ruleset.size(); i++){
1904	RipperRule rule = (RipperRule)m_Ruleset.elementAt(i);
1905
1906	/* In case that one class does not contain any instances (e.g. in UCI-dataset glass),
1907	* a default rule assigns all instances to the other class. Such a rule may be ignored here.
1908	*/
1909	if (!rule.hasAntds())
1910	continue;
1911
1912
1913	// Calculate the maximum degree of coverage
1914	if(rule.covers(datum)){
1915	rulesCoveringForEachClass[(int)rule.m_Consequent] += rule.coverageDegree(datum) * rule.getConfidence();
1916	}
1917
1918	}
1919
1920
1921	// If no rule covered the example, then maybe start the rule stretching
1922	if (Utils.sum(rulesCoveringForEachClass)==0){
1923
1924	// If rule stretching is not allowed,
1925	// return either the apriori prediction
1926	if (m_uncovAction == UNCOVACTION_APRIORI){
1927	rulesCoveringForEachClass = aprioriDistribution;
1928	if (Utils.sum(rulesCoveringForEachClass)>0)
1929	Utils.normalize(rulesCoveringForEachClass);
1930	return rulesCoveringForEachClass;
1931	}
1932	// or abstain from that decision at all.
1933	if (m_uncovAction == UNCOVACTION_REJECT)
1934	return rulesCoveringForEachClass;
1935
1936	// Copy the ruleset as backup
1937	FastVector origRuleset = (FastVector) m_Ruleset.copyElements();
1938
1939	// Find for every rule the first antecedent that does not
1940	// cover the given instance.
1941	rulesCoveringForEachClass = new double[rulesCoveringForEachClass.length];
1942	for(int i=0; i < m_Ruleset.size(); i++){
1943	RipperRule rule = (RipperRule)m_Ruleset.elementAt(i);
1944	double numAntdsBefore = rule.m_Antds.size();
1945
1946	int firstAntdToDelete = Integer.MAX_VALUE;
1947	for (int j = 0; j < rule.m_Antds.size(); j++){
1948	if (((Antd)rule.m_Antds.elementAt(j)).covers(datum)==0){
1949	firstAntdToDelete = j;
1950	break;
1951	}
1952	}
1953
1954	// Prune antecedent such that it covers the instance
1955	for (int j = firstAntdToDelete; j < rule.m_Antds.size(); j++){
1956	rule.m_Antds.removeElementAt(j--);
1957	}
1958	double numAntdsAfter = rule.m_Antds.size();
1959
1960	// Empty rules shall not vote here
1961	if (!rule.hasAntds())
1962	continue;
1963
1964	// Calculate the maximum degree of coverage and weight the rule
1965	// by its confidence and the fraction of antecedents left after
1966	// rule stretching
1967	double secondWeight = (numAntdsAfter+1)/(numAntdsBefore+2) ;
1968	if (rule.getConfidence() secondWeightrule.coverageDegree(datum) >= rulesCoveringForEachClass[(int)rule.getConsequent()]){
1969	rulesCoveringForEachClass[(int)rule.getConsequent()] = rule.getConfidence()secondWeightrule.coverageDegree(datum);
1970	}
1971	}
1972
1973	// Reestablish original ruleset
1974	m_Ruleset = origRuleset;
1975	}
1976
1977	//check for conflicts
1978	double[] maxClasses = new double[rulesCoveringForEachClass.length];
1979	for (int i = 0; i < rulesCoveringForEachClass.length; i++){
1980	if (rulesCoveringForEachClass[Utils.maxIndex(rulesCoveringForEachClass)] ==
1981	rulesCoveringForEachClass[i] && rulesCoveringForEachClass[i]>0)
1982	maxClasses[i] = 1;
1983	}
1984
1985	//If there is a conflict, resolve it using the apriori distribution
1986	if (Utils.sum(maxClasses)>0){
1987	for (int i = 0; i < maxClasses.length; i++){
1988	if (maxClasses[i] > 0 && aprioriDistribution[i] != rulesCoveringForEachClass[Utils.maxIndex(rulesCoveringForEachClass)])
1989	rulesCoveringForEachClass[i] -= 0.00001;
1990	}
1991	}
1992
1993	// If no stretched rule was able to cover the instance,
1994	// then fall back to the apriori distribution
1995	if (Utils.sum(rulesCoveringForEachClass)==0){
1996	rulesCoveringForEachClass = aprioriDistribution;
1997	}
1998
1999
2000	if (Utils.sum(rulesCoveringForEachClass)>0)
2001	Utils.normalize(rulesCoveringForEachClass);
2002
2003	return rulesCoveringForEachClass;
2004
2005	}
2006
2007
2008	/** Build a ruleset for the given class according to the given data
2009	*
2010	* @param expFPRate the expected FP/(FP+FN) used in DL calculation
2011	* @param data the given data
2012	* @param classIndex the given class index
2013	* @param defDL the default DL in the data
2014	* @throws Exception if the ruleset can be built properly
2015	*/
2016	protected Instances rulesetForOneClass(double expFPRate,
2017	Instances data,
2018	double classIndex,
2019	double defDL)
2020	throws Exception {
2021
2022	Instances newData = data, growData, pruneData;
2023	boolean stop = false;
2024	FastVector ruleset = new FastVector();
2025
2026	double dl = defDL, minDL = defDL;
2027	RuleStats rstats = null;
2028	double[] rst;
2029
2030	// Check whether data have positive examples
2031	boolean defHasPositive = true; // No longer used
2032	boolean hasPositive = defHasPositive;
2033
2034	/******************** Building stage *********************/
2035	if(m_Debug)
2036	System.err.println("\n* Building stage *");
2037
2038
2039	while((!stop) && hasPositive){ // Generate new rules until
2040	// stopping criteria met
2041	RipperRule oneRule;
2042
2043	oneRule = new RipperRule();
2044	oneRule.setConsequent(classIndex); // Must set first
2045	if(m_Debug)
2046	System.err.println("\nNo pruning: growing a rule ...");
2047	oneRule.grow(newData); // Build the rule
2048	if(m_Debug)
2049	System.err.println("No pruning: one rule found:\n"+
2050	oneRule.toString(m_Class));
2051
2052
2053	// Compute the DL of this ruleset
2054	if(rstats == null){ // First rule
2055	rstats = new RuleStats();
2056	rstats.setNumAllConds(m_Total);
2057	rstats.setData(newData);
2058	}
2059
2060	rstats.addAndUpdate(oneRule);
2061	int last = rstats.getRuleset().size()-1; // Index of last rule
2062	dl += rstats.relativeDL(last, expFPRate, m_CheckErr);
2063
2064	if(Double.isNaN(dl) \|\| Double.isInfinite(dl))
2065	throw new Exception("Should never happen: dl in "+
2066	"building stage NaN or infinite!");
2067	if(m_Debug)
2068	System.err.println("Before optimization("+last+
2069	"): the dl = "+dl+" \| best: "+minDL);
2070
2071	if(dl < minDL)
2072	minDL = dl; // The best dl so far
2073
2074	rst = rstats.getSimpleStats(last);
2075	if(m_Debug)
2076	System.err.println("The rule covers: "+rst[0]+
2077	" \| pos = " + rst[2] +
2078	" \| neg = " + rst[4]+
2079	"\nThe rule doesn't cover: "+rst[1]+
2080	" \| pos = " + rst[5]);
2081
2082	stop = checkStop(rst, minDL, dl);
2083
2084	if(!stop){
2085	ruleset.addElement(oneRule); // Accepted
2086	newData = rstats.getFiltered(last)[1];// Data not covered
2087	hasPositive = Utils.gr(rst[5], 0.0); // Positives remaining?
2088	if(m_Debug)
2089	System.err.println("One rule added: has positive? "
2090	+hasPositive);
2091	}
2092	else{
2093	if(m_Debug)
2094	System.err.println("Quit rule");
2095	rstats.removeLast(); // Remove last to be re-used
2096	}
2097	}// while !stop
2098
2099
2100	/****************** Optimization stage *****************/
2101
2102	RuleStats finalRulesetStat = null;
2103	for(int z=0; z < m_Optimizations; z++){
2104	if(m_Debug)
2105	System.err.println("\n*** Optimization: run #"
2106	+z+" ***");
2107
2108	newData = data;
2109	finalRulesetStat = new RuleStats();
2110	finalRulesetStat.setData(newData);
2111	finalRulesetStat.setNumAllConds(m_Total);
2112	int position=0;
2113	stop = false;
2114	boolean isResidual = false;
2115	hasPositive = defHasPositive;
2116	dl = minDL = defDL;
2117
2118	oneRule:
2119	while(!stop && hasPositive){
2120
2121	isResidual = (position>=ruleset.size()); // Cover residual positive examples
2122	// Re-do shuffling and stratification
2123	//newData.randomize(m_Random);
2124	newData = RuleStats.stratify(newData, m_Folds, m_Random);
2125	Instances[] part = RuleStats.partition(newData, m_Folds);
2126	growData=part[0];
2127	pruneData=part[1];
2128	//growData=newData.trainCV(m_Folds, m_Folds-1);
2129	//pruneData=newData.testCV(m_Folds, m_Folds-1);
2130	RipperRule finalRule;
2131
2132	if(m_Debug)
2133	System.err.println("\nRule #"+position +
2134	"\| isResidual?" + isResidual+
2135	"\| data size: "+newData.sumOfWeights());
2136
2137	if(isResidual){
2138	RipperRule newRule = new RipperRule();
2139	newRule.setConsequent(classIndex);
2140	if(m_Debug)
2141	System.err.println("\nGrowing and pruning"+
2142	" a new rule ...");
2143	newRule.grow(newData);
2144	finalRule = newRule;
2145	if(m_Debug)
2146	System.err.println("\nNew rule found: "+
2147	newRule.toString(m_Class));
2148	}
2149	else{
2150	RipperRule oldRule = (RipperRule)ruleset.elementAt(position);
2151	boolean covers = false;
2152	// Test coverage of the next old rule
2153	for(int i=0; i<newData.numInstances(); i++)
2154	if(oldRule.covers(newData.instance(i))){
2155	covers = true;
2156	break;
2157	}
2158
2159	if(!covers){// Null coverage, no variants can be generated
2160	finalRulesetStat.addAndUpdate(oldRule);
2161	position++;
2162	continue oneRule;
2163	}
2164
2165	// 2 variants
2166	if(m_Debug)
2167	System.err.println("\nGrowing and pruning"+
2168	" Replace ...");
2169	RipperRule replace = new RipperRule();
2170	replace.setConsequent(classIndex);
2171	replace.grow(growData);
2172
2173	// Remove the pruning data covered by the following
2174	// rules, then simply compute the error rate of the
2175	// current rule to prune it. According to Ripper,
2176	// it's equivalent to computing the error of the
2177	// whole ruleset -- is it true?
2178	pruneData = RuleStats.rmCoveredBySuccessives(pruneData,ruleset, position);
2179	replace.prune(pruneData, true);
2180
2181	if(m_Debug)
2182	System.err.println("\nGrowing and pruning"+
2183	" Revision ...");
2184	RipperRule revision = (RipperRule)oldRule.copy();
2185
2186	// For revision, first rm the data covered by the old rule
2187	Instances newGrowData = new Instances(growData, 0);
2188	for(int b=0; b<growData.numInstances(); b++){
2189	Instance inst = growData.instance(b);
2190	if(revision.covers(inst))
2191	newGrowData.add(inst);
2192	}
2193	revision.grow(newGrowData);
2194	revision.prune(pruneData, true);
2195
2196	double[][] prevRuleStats = new double[position][6];
2197	for(int c=0; c < position; c++)
2198	prevRuleStats[c] = finalRulesetStat.getSimpleStats(c);
2199
2200	// Now compare the relative DL of variants
2201	FastVector tempRules = (FastVector)ruleset.copyElements();
2202	tempRules.setElementAt(replace, position);
2203
2204	RuleStats repStat = new RuleStats(data, tempRules);
2205	repStat.setNumAllConds(m_Total);
2206	repStat.countData(position, newData, prevRuleStats);
2207	//repStat.countData();
2208	rst = repStat.getSimpleStats(position);
2209	if(m_Debug)
2210	System.err.println("Replace rule covers: "+rst[0]+
2211	" \| pos = " + rst[2] +
2212	" \| neg = " + rst[4]+
2213	"\nThe rule doesn't cover: "+rst[1]+
2214	" \| pos = " + rst[5]);
2215
2216	double repDL = repStat.relativeDL(position, expFPRate,
2217	m_CheckErr);
2218
2219	if(m_Debug)
2220	System.err.println("\nReplace: "+
2221	replace.toString(m_Class)
2222	+" \|dl = "+repDL);
2223
2224	if(Double.isNaN(repDL) \|\| Double.isInfinite(repDL))
2225	throw new Exception("Should never happen: repDL"+
2226	"in optmz. stage NaN or "+
2227	"infinite!");
2228
2229	tempRules.setElementAt(revision, position);
2230	RuleStats revStat = new RuleStats(data, tempRules);
2231	revStat.setNumAllConds(m_Total);
2232	revStat.countData(position, newData, prevRuleStats);
2233	//revStat.countData();
2234	double revDL = revStat.relativeDL(position, expFPRate,
2235	m_CheckErr);
2236
2237	if(m_Debug)
2238	System.err.println("Revision: "
2239	+ revision.toString(m_Class)
2240	+" \|dl = "+revDL);
2241
2242	if(Double.isNaN(revDL) \|\| Double.isInfinite(revDL))
2243	throw new Exception("Should never happen: revDL"+
2244	"in optmz. stage NaN or "+
2245	"infinite!");
2246
2247	rstats = new RuleStats(data, ruleset);
2248	rstats.setNumAllConds(m_Total);
2249	rstats.countData(position, newData, prevRuleStats);
2250	//rstats.countData();
2251	double oldDL = rstats.relativeDL(position, expFPRate,
2252	m_CheckErr);
2253
2254	if(Double.isNaN(oldDL) \|\| Double.isInfinite(oldDL))
2255	throw new Exception("Should never happen: oldDL"+
2256	"in optmz. stage NaN or "+
2257	"infinite!");
2258	if(m_Debug)
2259	System.err.println("Old rule: "+
2260	oldRule.toString(m_Class)
2261	+" \|dl = "+oldDL);
2262
2263	if(m_Debug)
2264	System.err.println("\nrepDL: "+repDL+
2265	"\nrevDL: "+revDL+
2266	"\noldDL: "+oldDL);
2267
2268	if((oldDL <= revDL) && (oldDL <= repDL))
2269	finalRule = oldRule; // Old the best
2270	else if(revDL <= repDL)
2271	finalRule = revision; // Revision the best
2272	else
2273	finalRule = replace; // Replace the best
2274	}
2275
2276	finalRulesetStat.addAndUpdate(finalRule);
2277	rst = finalRulesetStat.getSimpleStats(position);
2278
2279	if(isResidual){
2280	dl += finalRulesetStat.relativeDL(position,
2281	expFPRate,
2282	m_CheckErr);
2283
2284	if(m_Debug)
2285	System.err.println("After optimization: the dl"
2286	+"="+dl+" \| best: "+minDL);
2287
2288	if(dl < minDL)
2289	minDL = dl; // The best dl so far
2290
2291	stop = checkStop(rst, minDL, dl);
2292	if(!stop)
2293	ruleset.addElement(finalRule); // Accepted
2294	else{
2295	finalRulesetStat.removeLast(); // Remove last to be re-used
2296	position--;
2297	}
2298	}
2299	else
2300	ruleset.setElementAt(finalRule, position); // Accepted
2301
2302	if(m_Debug){
2303	System.err.println("The rule covers: "+rst[0]+
2304	" \| pos = " + rst[2] +
2305	" \| neg = " + rst[4]+
2306	"\nThe rule doesn't cover: "+rst[1]+
2307	" \| pos = " + rst[5]);
2308	System.err.println("\nRuleset so far: ");
2309	for(int x=0; x<ruleset.size(); x++)
2310	System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class));
2311	System.err.println();
2312	}
2313
2314	//Data not covered
2315	if(finalRulesetStat.getRulesetSize() > 0)// If any rules
2316	newData = finalRulesetStat.getFiltered(position)[1];
2317	hasPositive = Utils.gr(rst[5], 0.0); //Positives remaining?
2318	position++;
2319	} // while !stop && hasPositive
2320
2321	if(ruleset.size() > (position+1)){ // Hasn't gone through yet
2322	for(int k=position+1; k<ruleset.size(); k++)
2323	finalRulesetStat.addAndUpdate((Rule)ruleset.elementAt(k));
2324	}
2325	if(m_Debug)
2326	System.err.println("\nDeleting rules to decrease"+
2327	" DL of the whole ruleset ...");
2328	finalRulesetStat.reduceDL(expFPRate, m_CheckErr);
2329
2330	if(m_Debug){
2331	int del = ruleset.size() -
2332	finalRulesetStat.getRulesetSize();
2333	System.err.println(del+" rules are deleted"+
2334	" after DL reduction procedure");
2335	}
2336	ruleset = finalRulesetStat.getRuleset();
2337	rstats = finalRulesetStat;
2338
2339	} // For each run of optimization
2340
2341	// Concatenate the ruleset for this class to the whole ruleset
2342	if(m_Debug){
2343	System.err.println("\nFinal ruleset: ");
2344	for(int x=0; x<ruleset.size(); x++)
2345	System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class));
2346	System.err.println();
2347	}
2348
2349
2350	m_Ruleset.appendElements(ruleset);
2351	m_RulesetStats.addElement(rstats);
2352
2353	return null;
2354	}
2355
2356	/**
2357	* Check whether the stopping criterion meets
2358	*
2359	* @param rst the statistic of the ruleset
2360	* @param minDL the min description length so far
2361	* @param dl the current description length of the ruleset
2362	* @return true if stop criterion meets, false otherwise
2363	*/
2364	private boolean checkStop(double[] rst, double minDL, double dl){
2365
2366
2367	if(dl > minDL+MAX_DL_SURPLUS){
2368	if(m_Debug)
2369	System.err.println("DL too large: "+dl+" \| "+minDL);
2370	return true;
2371	}
2372	else
2373	if(!Utils.gr(rst[2], 0.0)){// Covered positives
2374	if(m_Debug)
2375	System.err.println("Too few positives.");
2376	return true;
2377	}
2378	else if((rst[4]/rst[0]) >= 0.5){// Err rate
2379	if(m_CheckErr){
2380	if(m_Debug)
2381	System.err.println("Error too large: "+
2382	rst[4] + "/" + rst[0]);
2383	return true;
2384	}
2385	else
2386	return false;
2387	}
2388	else{// Not stops
2389	if(m_Debug)
2390	System.err.println("Continue.");
2391	return false;
2392	}
2393	}
2394
2395	/**
2396	* Prints the all the rules of the rule learner.
2397	*
2398	* @return a textual description of the classifier
2399	*/
2400	public String toString() {
2401	if (m_Ruleset == null)
2402	return "FURIA: No model built yet.";
2403
2404	StringBuffer sb = new StringBuffer("FURIA rules:\n"+
2405	"===========\n\n");
2406	for(int j=0; j<m_RulesetStats.size(); j++){
2407	RuleStats rs = (RuleStats)m_RulesetStats.elementAt(j);
2408	FastVector rules = rs.getRuleset();
2409	for(int k=0; k<rules.size(); k++){
2410	sb.append(((RipperRule)rules.elementAt(k)).toString(m_Class)
2411	+ " (CF = " + Math.round(100.0*((RipperRule)rules.elementAt(k)).getConfidence())/100.0 +")\n");
2412	}
2413	}
2414	if(m_Debug){
2415	System.err.println("Inside m_Ruleset");
2416	for(int i=0; i<m_Ruleset.size(); i++)
2417	System.err.println(((RipperRule)m_Ruleset.elementAt(i)).toString(m_Class));
2418	}
2419	sb.append("\nNumber of Rules : "
2420	+ m_Ruleset.size() + "\n");
2421
2422
2423
2424
2425	return sb.toString();
2426	}
2427
2428	/**
2429	* Main method.
2430	*
2431	* @param args the options for the classifier
2432	* @throws Exception
2433	*/
2434	public static void main(String[] args) throws Exception {
2435	runClassifier(new FURIA(), args);
2436	}
2437
2438	/**
2439	*
2440	*/
2441	public String getRevision() {
2442	return "$Revision: 5964 $";
2443	}
2444	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: