Context Navigation

source: src/main/java/weka/classifiers/rules/FURIA.java @ 18

Last change on this file since 18 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 85.4 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* FURIA.java
	19	* Copyright (C) 2008,2009 Jens Christian Huehn
	20	*
	21	* (based upon) JRip.java
	22	* Copyright (C) 2001 Xin Xu, Eibe Frank
	23	*/
	24
	25	package weka.classifiers.rules;
	26
	27	import java.io.Serializable;
	28	import java.util.Enumeration;
	29	import java.util.Random;
	30	import java.util.Vector;
	31
	32	import weka.classifiers.AbstractClassifier;
	33	import weka.core.AdditionalMeasureProducer;
	34	import weka.core.Attribute;
	35	import weka.core.Capabilities;
	36	import weka.core.Copyable;
	37	import weka.core.FastVector;
	38	import weka.core.Instance;
	39	import weka.core.Instances;
	40	import weka.core.Option;
	41	import weka.core.OptionHandler;
	42	import weka.core.SelectedTag;
	43	import weka.core.Tag;
	44	import weka.core.TechnicalInformation;
	45	import weka.core.TechnicalInformationHandler;
	46	import weka.core.Utils;
	47	import weka.core.WeightedInstancesHandler;
	48	import weka.core.Capabilities.Capability;
	49	import weka.core.TechnicalInformation.Field;
	50	import weka.core.TechnicalInformation.Type;
	51
	52	/**
	53	<!-- globalinfo-start -->
	54	* FURIA: Fuzzy Unordered Rule Induction Algorithm<br/>
	55	* <br/>
	56	* Details please see:<br/>
	57	* <br/>
	58	* Jens Christian Huehn, Eyke Huellermeier (2009). FURIA: An Algorithm for Unordered Fuzzy Rule Induction. Data Mining and Knowledge Discovery..<br/>
	59	* <br/>
	60	* <p/>
	61	<!-- globalinfo-end -->
	62	*
	63	<!-- technical-bibtex-start -->
	64	* BibTeX:
	65	* <pre>
	66	* @article{Huehn2009,
	67	* author = {Jens Christian Huehn and Eyke Huellermeier},
	68	* journal = {Data Mining and Knowledge Discovery},
	69	* title = {FURIA: An Algorithm for Unordered Fuzzy Rule Induction},
	70	* year = {2009}
	71	* }
	72	* </pre>
	73	* <p/>
	74	<!-- technical-bibtex-end -->
	75	*
	76	<!-- options-start -->
	77	* Valid options are: <p/>
	78	*
	79	* <pre> -F <number of folds>
	80	* Set number of folds for REP
	81	* One fold is used as pruning set.
	82	* (default 3)</pre>
	83	*
	84	* <pre> -N <min. weights>
	85	* Set the minimal weights of instances
	86	* within a split.
	87	* (default 2.0)</pre>
	88	*
	89	* <pre> -O <number of runs>
	90	* Set the number of runs of
	91	* optimizations. (Default: 2)</pre>
	92	*
	93	* <pre> -D
	94	* Set whether turn on the
	95	* debug mode (Default: false)</pre>
	96	*
	97	* <pre> -S <seed>
	98	* The seed of randomization
	99	* (Default: 1)</pre>
	100	*
	101	* <pre> -E
	102	* Whether NOT check the error rate>=0.5
	103	* in stopping criteria (default: check)</pre>
	104	*
	105	* <pre> -s
	106	* The action performed for uncovered instances.
	107	* (default: use stretching)</pre>
	108	*
	109	* <pre> -p
	110	* The T-norm used as fuzzy AND-operator.
	111	* (default: Product T-norm)</pre>
	112	*
	113	<!-- options-end -->
	114	*
	115	* @author Jens Christian Hühn (huehn@gmx.net)
	116	* @author Xin Xu (xx5@cs.waikato.ac.nz)
	117	* @author Eibe Frank (eibe@cs.waikato.ac.nz)
	118	* @version $Revision: 5964 $
	119	*/
	120	public class FURIA
	121	extends AbstractClassifier
	122	implements OptionHandler, AdditionalMeasureProducer, WeightedInstancesHandler,
	123	TechnicalInformationHandler {
	124
	125	/** for serialization */
	126	static final long serialVersionUID = -6589312996832147161L;
	127
	128	/** The limit of description length surplus in ruleset generation */
	129	private static double MAX_DL_SURPLUS = 64.0;
	130
	131	/** The class attribute of the data*/
	132	private Attribute m_Class;
	133
	134	/** The ruleset */
	135	private FastVector m_Ruleset;
	136
	137	/** The predicted class distribution */
	138	private FastVector m_Distributions;
	139
	140	/** Runs of optimizations */
	141	private int m_Optimizations = 2;
	142
	143	/** Random object used in this class */
	144	private Random m_Random = null;
	145
	146	/** # of all the possible conditions in a rule */
	147	private double m_Total = 0;
	148
	149	/** The seed to perform randomization */
	150	private long m_Seed = 1;
	151
	152	/** The number of folds to split data into Grow and Prune for IREP */
	153	private int m_Folds = 3;
	154
	155	/** The minimal number of instance weights within a split*/
	156	private double m_MinNo = 2.0;
	157
	158	/** Whether in a debug mode */
	159	private boolean m_Debug = false;
	160
	161	/** Whether check the error rate >= 0.5 in stopping criteria */
	162	private boolean m_CheckErr = true;
	163
	164	/** The class distribution of the training data*/
	165	private double[] aprioriDistribution;
	166
	167	/** The RuleStats for the ruleset of each class value */
	168	private FastVector m_RulesetStats;
	169
	170
	171	/** What to do if instance is uncovered */
	172	private int m_uncovAction = UNCOVACTION_STRETCH;
	173
	174	/** An uncovered instance is covered using rule stretching. */
	175	private static final int UNCOVACTION_STRETCH = 0;
	176
	177	/** An uncovered instance is classified according to the training data class distribution. */
	178	private static final int UNCOVACTION_APRIORI = 1;
	179
	180	/** An uncovered instance is not classified at all. */
	181	private static final int UNCOVACTION_REJECT = 2;
	182
	183	/** The tags explaining the uncovered action. */
	184	private static final Tag [] TAGS_UNCOVACTION = {
	185	new Tag(UNCOVACTION_STRETCH, "Apply rule stretching (standard)"),
	186	new Tag(UNCOVACTION_APRIORI, "Vote for the most frequent class"),
	187	new Tag(UNCOVACTION_REJECT, "Reject the decision and abstain")
	188	};
	189
	190
	191	/** Whether using product T-norm (or else min T-norm) */
	192	private int m_tNorm = TNORM_PROD;
	193
	194	/** The Product T-Norm flag. */
	195	private static final int TNORM_PROD = 0;
	196
	197	/** The Minimum T-Norm flag. */
	198	private static final int TNORM_MIN = 1;
	199
	200	/** The tags describing the T-norms */
	201	private static final Tag [] TAGS_TNORM = {
	202	new Tag(TNORM_PROD, "Product T-Norm (standard)"),
	203	new Tag(TNORM_MIN, "Minimum T-Norm")
	204	};
	205
	206
	207	/**
	208	* Returns a string describing classifier
	209	* @return a description suitable for
	210	* displaying in the explorer/experimenter gui
	211	*/
	212	public String globalInfo() {
	213
	214	return "FURIA: Fuzzy Unordered Rule Induction Algorithm\n\n"
	215	+ "Details please see:\n\n"
	216	+ getTechnicalInformation().toString() + "\n\n";
	217	}
	218
	219	/**
	220	* Returns an instance of a TechnicalInformation object, containing
	221	* detailed information about the technical background of this class,
	222	* e.g., paper reference or book this class is based on.
	223	*
	224	* @return the technical information about this class
	225	*/
	226	public TechnicalInformation getTechnicalInformation() {
	227	TechnicalInformation result;
	228
	229	result = new TechnicalInformation(Type.ARTICLE);
	230	result.setValue(Field.AUTHOR, "Jens Christian Huehn and Eyke Huellermeier");
	231	result.setValue(Field.TITLE, "FURIA: An Algorithm for Unordered Fuzzy Rule Induction");
	232	result.setValue(Field.YEAR, "2009");
	233	result.setValue(Field.JOURNAL, "Data Mining and Knowledge Discovery");
	234	return result;
	235	}
	236
	237	/**
	238	* Returns an enumeration describing the available options
	239	* Valid options are: <p>
	240	*
	241	* -F number <br>
	242	* The number of folds for reduced error pruning. One fold is
	243	* used as the pruning set. (Default: 3) <p>
	244	*
	245	* -N number <br>
	246	* The minimal weights of instances within a split.
	247	* (Default: 2) <p>
	248	*
	249	* -O number <br>
	250	* Set the number of runs of optimizations. (Default: 2)<p>
	251	*
	252	* -D <br>
	253	* Whether turn on the debug mode
	254	*
	255	* -S number <br>
	256	* The seed of randomization used in FURIA.(Default: 1)<p>
	257	*
	258	* -E <br>
	259	* Whether NOT check the error rate >= 0.5 in stopping criteria.
	260	* (default: check)<p>
	261	*
	262	* -s <br>
	263	* The action performed for uncovered instances.
	264	* (default: use rule stretching)<p>
	265	*
	266	* -p <br>
	267	* The T-Norm used as fuzzy AND-operator.
	268	* (default: Product T-Norm)<p>
	269	*
	270	* @return an enumeration of all the available options
	271	*/
	272	public Enumeration listOptions() {
	273	Vector newVector = new Vector(8);
	274	newVector.addElement(new Option("\tSet number of folds for REP\n" +
	275	"\tOne fold is used as pruning set.\n" +
	276	"\t(default 3)","F", 1, "-F <number of folds>"));
	277
	278	newVector.addElement(new Option("\tSet the minimal weights of instances\n" +
	279	"\twithin a split.\n" +
	280	"\t(default 2.0)","N", 1, "-N <min. weights>"));
	281
	282	newVector.addElement(new Option("\tSet the number of runs of\n"+
	283	"\toptimizations. (Default: 2)", "O",
	284	1,"-O <number of runs>"));
	285
	286	newVector.addElement(new Option("\tSet whether turn on the\n"+
	287	"\tdebug mode (Default: false)", "D",
	288	0,"-D"));
	289
	290	newVector.addElement(new Option("\tThe seed of randomization\n"+
	291	"\t(Default: 1)", "S",
	292	1,"-S <seed>"));
	293
	294	newVector.addElement(new Option("\tWhether NOT check the error rate>=0.5\n"
	295	+"\tin stopping criteria "
	296	+"\t(default: check)", "E",
	297	0, "-E"));
	298
	299	newVector.addElement(new Option("\tThe action performed for uncovered instances.\n"
	300	+"\t(default: use stretching)", "s",
	301	1, "-s"));
	302
	303	newVector.addElement(new Option("\tThe T-norm used as fuzzy AND-operator.\n"
	304	+"\t(default: Product T-norm)", "p",
	305	1, "-p"));
	306
	307	return newVector.elements();
	308	}
	309
	310	/**
	311	* Parses a given list of options. <p/>
	312	*
	313	<!-- options-start -->
	314	* Valid options are: <p/>
	315	*
	316	* <pre> -F <number of folds>
	317	* Set number of folds for REP
	318	* One fold is used as pruning set.
	319	* (default 3)</pre>
	320	*
	321	* <pre> -N <min. weights>
	322	* Set the minimal weights of instances
	323	* within a split.
	324	* (default 2.0)</pre>
	325	*
	326	* <pre> -O <number of runs>
	327	* Set the number of runs of
	328	* optimizations. (Default: 2)</pre>
	329	*
	330	* <pre> -D
	331	* Set whether turn on the
	332	* debug mode (Default: false)</pre>
	333	*
	334	* <pre> -S <seed>
	335	* The seed of randomization
	336	* (Default: 1)</pre>
	337	*
	338	* <pre> -E
	339	* Whether NOT check the error rate>=0.5
	340	* in stopping criteria (default: check)</pre>
	341	*
	342	* <pre> -s
	343	* The action performed for uncovered instances.
	344	* (default: use stretching)</pre>
	345	*
	346	* <pre> -p
	347	* The T-norm used as fuzzy AND-operator.
	348	* (default: Product T-norm)</pre>
	349	*
	350	<!-- options-end -->
	351	*
	352	* @param options the list of options as an array of strings
	353	* @throws Exception if an option is not supported
	354	*/
	355	public void setOptions(String[] options) throws Exception {
	356	String numFoldsString = Utils.getOption('F', options);
	357	if (numFoldsString.length() != 0)
	358	m_Folds = Integer.parseInt(numFoldsString);
	359	else
	360	m_Folds = 3;
	361
	362	String minNoString = Utils.getOption('N', options);
	363	if (minNoString.length() != 0)
	364	m_MinNo = Double.parseDouble(minNoString);
	365	else
	366	m_MinNo = 2.0;
	367
	368	String seedString = Utils.getOption('S', options);
	369	if (seedString.length() != 0)
	370	m_Seed = Long.parseLong(seedString);
	371	else
	372	m_Seed = 1;
	373
	374	String runString = Utils.getOption('O', options);
	375	if (runString.length() != 0)
	376	m_Optimizations = Integer.parseInt(runString);
	377	else
	378	m_Optimizations = 2;
	379
	380	String tNormString = Utils.getOption('p', options);
	381	if (tNormString.length() != 0)
	382	m_tNorm = Integer.parseInt(tNormString);
	383	else
	384	m_tNorm = TNORM_PROD;
	385
	386	String uncovActionString = Utils.getOption('s', options);
	387	if (uncovActionString.length() != 0)
	388	m_uncovAction = Integer.parseInt(uncovActionString);
	389	else
	390	m_uncovAction = UNCOVACTION_STRETCH;
	391
	392	m_Debug = Utils.getFlag('D', options);
	393
	394	m_CheckErr = !Utils.getFlag('E', options);
	395	}
	396
	397	/**
	398	* Gets the current settings of the Classifier.
	399	*
	400	* @return an array of strings suitable for passing to setOptions
	401	*/
	402	public String [] getOptions() {
	403
	404	String [] options = new String [14];
	405	int current = 0;
	406	options[current++] = "-F"; options[current++] = "" + m_Folds;
	407	options[current++] = "-N"; options[current++] = "" + m_MinNo;
	408	options[current++] = "-O"; options[current++] = "" + m_Optimizations;
	409	options[current++] = "-S"; options[current++] = "" + m_Seed;
	410	options[current++] = "-p"; options[current++] = "" + m_tNorm;
	411	options[current++] = "-s"; options[current++] = "" + m_uncovAction;
	412
	413	if(m_Debug)
	414	options[current++] = "-D";
	415
	416	if(!m_CheckErr)
	417	options[current++] = "-E";
	418
	419	while(current < options.length)
	420	options[current++] = "";
	421
	422	return options;
	423	}
	424
	425	/**
	426	* Returns an enumeration of the additional measure names
	427	* @return an enumeration of the measure names
	428	*/
	429	public Enumeration enumerateMeasures() {
	430	Vector newVector = new Vector(1);
	431	newVector.addElement("measureNumRules");
	432	return newVector.elements();
	433	}
	434
	435	/**
	436	* Returns the value of the named measure
	437	* @param additionalMeasureName the name of the measure to query for its value
	438	* @return the value of the named measure
	439	* @throws IllegalArgumentException if the named measure is not supported
	440	*/
	441	public double getMeasure(String additionalMeasureName) {
	442	if (additionalMeasureName.compareToIgnoreCase("measureNumRules") == 0)
	443	return m_Ruleset.size();
	444	else
	445	throw new IllegalArgumentException(additionalMeasureName+" not supported (FURIA)");
	446	}
	447
	448	/**
	449	* Returns the tip text for this property
	450	* @return tip text for this property suitable for
	451	* displaying in the explorer/experimenter gui
	452	*/
	453	public String foldsTipText() {
	454	return "Determines the amount of data used for pruning. One fold is used for "
	455	+ "pruning, the rest for growing the rules.";
	456	}
	457
	458	/**
	459	* Sets the number of folds to use
	460	*
	461	* @param fold the number of folds
	462	*/
	463	public void setFolds(int fold) {
	464	m_Folds = fold;
	465	}
	466
	467	/**
	468	* Gets the number of folds
	469	*
	470	* @return the number of folds
	471	*/
	472	public int getFolds(){
	473	return m_Folds;
	474	}
	475
	476	/**
	477	* Returns the tip text for this property
	478	* @return tip text for this property suitable for
	479	* displaying in the explorer/experimenter gui
	480	*/
	481	public String minNoTipText() {
	482	return "The minimum total weight of the instances in a rule.";
	483	}
	484
	485	/**
	486	* Sets the minimum total weight of the instances in a rule
	487	*
	488	* @param m the minimum total weight of the instances in a rule
	489	*/
	490	public void setMinNo(double m) {
	491	m_MinNo = m;
	492	}
	493
	494	/**
	495	* Gets the minimum total weight of the instances in a rule
	496	*
	497	* @return the minimum total weight of the instances in a rule
	498	*/
	499	public double getMinNo(){
	500	return m_MinNo;
	501	}
	502
	503	/**
	504	* Returns the tip text for this property
	505	* @return tip text for this property suitable for
	506	* displaying in the explorer/experimenter gui
	507	*/
	508	public String seedTipText() {
	509	return "The seed used for randomizing the data.";
	510	}
	511
	512	/**
	513	* Sets the seed value to use in randomizing the data
	514	*
	515	* @param s the new seed value
	516	*/
	517	public void setSeed(long s) {
	518	m_Seed = s;
	519	}
	520
	521	/**
	522	* Gets the current seed value to use in randomizing the data
	523	*
	524	* @return the seed value
	525	*/
	526	public long getSeed(){
	527	return m_Seed;
	528	}
	529
	530	/**
	531	* Returns the tip text for this property
	532	* @return tip text for this property suitable for
	533	* displaying in the explorer/experimenter gui
	534	*/
	535	public String optimizationsTipText() {
	536	return "The number of optimization runs.";
	537	}
	538
	539	/**
	540	* Sets the number of optimization runs
	541	*
	542	* @param run the number of optimization runs
	543	*/
	544	public void setOptimizations(int run) {
	545	m_Optimizations = run;
	546	}
	547
	548	/**
	549	* Gets the the number of optimization runs
	550	*
	551	* @return the number of optimization runs
	552	*/
	553	public int getOptimizations() {
	554	return m_Optimizations;
	555	}
	556
	557	/**
	558	* Returns the tip text for this property
	559	* @return tip text for this property suitable for
	560	* displaying in the explorer/experimenter gui
	561	*/
	562	public String debugTipText() {
	563	return "Whether debug information is output to the console.";
	564	}
	565
	566	/**
	567	* Sets whether debug information is output to the console
	568	*
	569	* @param d whether debug information is output to the console
	570	*/
	571	public void setDebug(boolean d) {
	572	m_Debug = d;
	573	}
	574
	575	/**
	576	* Gets whether debug information is output to the console
	577	*
	578	* @return whether debug information is output to the console
	579	*/
	580	public boolean getDebug(){
	581	return m_Debug;
	582	}
	583
	584	/**
	585	* Returns the tip text for this property
	586	* @return tip text for this property suitable for
	587	* displaying in the explorer/experimenter gui
	588	*/
	589	public String checkErrorRateTipText() {
	590	return "Whether check for error rate >= 1/2 is included" +
	591	" in stopping criterion.";
	592	}
	593
	594	/**
	595	* Sets whether to check for error rate is in stopping criterion
	596	*
	597	* @param d whether to check for error rate is in stopping criterion
	598	*/
	599	public void setCheckErrorRate(boolean d) {
	600	m_CheckErr = d;
	601	}
	602
	603	/**
	604	* Gets whether to check for error rate is in stopping criterion
	605	*
	606	* @return true if checking for error rate is in stopping criterion
	607	*/
	608	public boolean getCheckErrorRate(){
	609	return m_CheckErr;
	610	}
	611
	612	/**
	613	* Returns the tip text for this property
	614	* @return tip text for this property suitable for
	615	* displaying in the explorer/experimenter gui
	616	*/
	617	public String uncovActionTipText() {
	618	return "Selet the action that is performed for uncovered instances.";
	619	}
	620
	621	/**
	622	* Gets the action that is performed for uncovered instances.
	623	* It can be UNCOVACTION_STRETCH, UNCOVACTION_APRIORI or
	624	* UNCOVACTION_REJECT.
	625	* @return the current TNorm.
	626	*/
	627	public SelectedTag getUncovAction() {
	628	return new SelectedTag(m_uncovAction, TAGS_UNCOVACTION);
	629	}
	630
	631	/**
	632	* Sets the action that is performed for uncovered instances.
	633	* It can be UNCOVACTION_STRETCH, UNCOVACTION_APRIORI or
	634	* UNCOVACTION_REJECT.
	635	* @param newUncovAction the new action.
	636	*/
	637	public void setUncovAction(SelectedTag newUncovAction) {
	638	if (newUncovAction.getTags() == TAGS_UNCOVACTION) {
	639	m_uncovAction = newUncovAction.getSelectedTag().getID();
	640	}
	641	}
	642
	643	/**
	644	* Returns the tip text for this property
	645	* @return tip text for this property suitable for
	646	* displaying in the explorer/experimenter gui
	647	*/
	648	public String TNormTipText() {
	649	return "Choose the T-Norm that is used as fuzzy AND-operator.";
	650	}
	651
	652	/**
	653	* Gets the TNorm used. Will be either TNORM_PROD or TNORM_MIN.
	654	*
	655	* @return the current TNorm.
	656	*/
	657	public SelectedTag getTNorm() {
	658	return new SelectedTag(m_tNorm, TAGS_TNORM);
	659	}
	660
	661	/**
	662	* Sets the TNorm used. Will be either TNORM_PROD or TNORM_MIN.
	663	*
	664	* @param newTNorm the new TNorm.
	665	*/
	666	public void setTNorm(SelectedTag newTNorm) {
	667	if (newTNorm.getTags() == TAGS_TNORM) {
	668	m_tNorm = newTNorm.getSelectedTag().getID();
	669	}
	670	}
	671
	672
	673	/**
	674	* Get the ruleset generated by FURIA
	675	*
	676	* @return the ruleset
	677	*/
	678	public FastVector getRuleset(){ return m_Ruleset; }
	679
	680	/**
	681	* Get the statistics of the ruleset in the given position
	682	*
	683	* @param pos the position of the stats, assuming correct
	684	* @return the statistics of the ruleset in the given position
	685	*/
	686	public RuleStats getRuleStats(int pos) {
	687	return (RuleStats)m_RulesetStats.elementAt(pos);
	688	}
	689
	690	/**
	691	* The single antecedent in the rule, which is composed of an attribute and
	692	* the corresponding value. There are two inherited classes, namely NumericAntd
	693	* and NominalAntd in which the attributes are numeric and nominal respectively.
	694	*/
	695	protected abstract class Antd
	696	implements WeightedInstancesHandler, Copyable, Serializable {
	697
	698	/** The attribute of the antecedent */
	699	public Attribute att;
	700
	701	/** The attribute value of the antecedent.
	702	For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */
	703	public double value;
	704
	705	/** The maximum infoGain achieved by this antecedent test
	706	* in the growing data */
	707	protected double maxInfoGain;
	708
	709	/** The accurate rate of this antecedent test on the growing data */
	710	protected double accuRate;
	711
	712	/** The coverage of this antecedent in the growing data */
	713	protected double cover;
	714
	715	/** The accurate data for this antecedent in the growing data */
	716	protected double accu;
	717
	718
	719	/** Confidence / weight of this rule for the rule stretching procedure that
	720	* is returned when this is the last antecedent of the rule. */
	721	double weightOfTheRuleWhenItIsPrunedAfterThisAntecedent = 0;
	722
	723	/** Confidence / weight of this antecedent. */
	724	public double m_confidence = 0.0;
	725
	726	/**
	727	* Constructor
	728	*/
	729	public Antd(Attribute a){
	730	att=a;
	731	value=Double.NaN;
	732	maxInfoGain = 0;
	733	accuRate = Double.NaN;
	734	cover = Double.NaN;
	735	accu = Double.NaN;
	736	}
	737
	738	/* The abstract members for inheritance */
	739	public abstract Instances[] splitData(Instances data, double defAcRt,
	740	double cla);
	741	public abstract double covers(Instance inst);
	742	public abstract String toString();
	743
	744	/**
	745	* Implements Copyable
	746	*
	747	* @return a copy of this object
	748	*/
	749	public abstract Object copy();
	750
	751	/* Get functions of this antecedent */
	752	public Attribute getAttr(){ return att; }
	753	public double getAttrValue(){ return value; }
	754	public double getMaxInfoGain(){ return maxInfoGain; }
	755	public double getAccuRate(){ return accuRate; }
	756	public double getAccu(){ return accu; }
	757	public double getCover(){ return cover; }
	758	}
	759
	760	/**
	761	* The antecedent with numeric attribute
	762	*/
	763	public class
	764	NumericAntd extends Antd {
	765
	766	/** for serialization */
	767	static final long serialVersionUID = 5699457269983735442L;
	768
	769	/** The split point for this numeric antecedent */
	770	public double splitPoint;
	771
	772	/** The edge point for the fuzzy set of this numeric antecedent */
	773	public double supportBound;
	774
	775	/** A flag determining whether this antecedent was successfully fuzzified yet*/
	776	public boolean fuzzyYet = false;
	777
	778
	779	/**
	780	* Constructor
	781	*/
	782	public NumericAntd(Attribute a){
	783	super(a);
	784	splitPoint = Double.NaN;
	785	supportBound = Double.NaN;
	786	}
	787
	788	/**
	789	* Get split point of this numeric antecedent
	790	*
	791	* @return the split point of this numeric antecedent
	792	*/
	793	public double getSplitPoint(){
	794	return splitPoint;
	795	}
	796
	797	/**
	798	* Implements Copyable
	799	*
	800	* @return a copy of this object
	801	*/
	802	public Object copy(){
	803	NumericAntd na = new NumericAntd(getAttr());
	804	na.m_confidence = m_confidence;
	805	na.value = this.value;
	806	na.splitPoint = this.splitPoint;
	807	na.supportBound = this.supportBound;
	808	na.fuzzyYet = this.fuzzyYet;
	809	return na;
	810	}
	811
	812
	813
	814	/**
	815	* Implements the splitData function.
	816	* This procedure is to split the data into two bags according
	817	* to the information gain of the numeric attribute value
	818	* The maximum infoGain is also calculated.
	819	*
	820	* @param insts the data to be split
	821	* @param defAcRt the default accuracy rate for data
	822	* @param cl the class label to be predicted
	823	* @return the array of data after split
	824	*/
	825	public Instances[] splitData(Instances insts, double defAcRt,
	826	double cl){
	827	Instances data = insts;
	828	int total=data.numInstances();// Total number of instances without
	829	// missing value for att
	830
	831	int split=1; // Current split position
	832	int prev=0; // Previous split position
	833	int finalSplit=split; // Final split position
	834	maxInfoGain = 0;
	835	value = 0;
	836
	837	double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0;
	838
	839	data.sort(att);
	840	// Find the las instance without missing value
	841	for(int x=0; x<data.numInstances(); x++){
	842	Instance inst = data.instance(x);
	843	if(inst.isMissing(att)){
	844	total = x;
	845	break;
	846	}
	847
	848	sndCover += inst.weight();
	849	if(Utils.eq(inst.classValue(), cl))
	850	sndAccu += inst.weight();
	851	}
	852
	853	if(total == 0) return null; // Data all missing for the attribute
	854	splitPoint = data.instance(total-1).value(att);
	855
	856	for(; split <= total; split++){
	857	if((split == total) \|\|
	858	(data.instance(split).value(att) > // Can't split within
	859	data.instance(prev).value(att))){ // same value
	860
	861	for(int y=prev; y<split; y++){
	862	Instance inst = data.instance(y);
	863	fstCover += inst.weight();
	864	if(Utils.eq(data.instance(y).classValue(), cl)){
	865	fstAccu += inst.weight(); // First bag positive# ++
	866	}
	867	}
	868
	869	double fstAccuRate = (fstAccu+1.0)/(fstCover+1.0),
	870	sndAccuRate = (sndAccu+1.0)/(sndCover+1.0);
	871
	872	/* Which bag has higher information gain? */
	873	boolean isFirst;
	874	double fstInfoGain, sndInfoGain;
	875	double accRate, infoGain, coverage, accurate;
	876
	877	fstInfoGain =
	878	//Utils.eq(defAcRt, 1.0) ?
	879	//fstAccu/(double)numConds :
	880	fstAccu*(Utils.log2(fstAccuRate)-Utils.log2(defAcRt));
	881
	882	sndInfoGain =
	883	//Utils.eq(defAcRt, 1.0) ?
	884	//sndAccu/(double)numConds :
	885	sndAccu*(Utils.log2(sndAccuRate)-Utils.log2(defAcRt));
	886
	887	if(fstInfoGain > sndInfoGain){
	888	isFirst = true;
	889	infoGain = fstInfoGain;
	890	accRate = fstAccuRate;
	891	accurate = fstAccu;
	892	coverage = fstCover;
	893	}
	894	else{
	895	isFirst = false;
	896	infoGain = sndInfoGain;
	897	accRate = sndAccuRate;
	898	accurate = sndAccu;
	899	coverage = sndCover;
	900	}
	901
	902	/* Check whether so far the max infoGain */
	903	if(infoGain > maxInfoGain){
	904	splitPoint = data.instance(prev).value(att);
	905	value = (isFirst) ? 0 : 1;
	906	accuRate = accRate;
	907	accu = accurate;
	908	cover = coverage;
	909	maxInfoGain = infoGain;
	910	finalSplit = (isFirst) ? split : prev;
	911	}
	912
	913	for(int y=prev; y<split; y++){
	914	Instance inst = data.instance(y);
	915	sndCover -= inst.weight();
	916	if(Utils.eq(data.instance(y).classValue(), cl)){
	917	sndAccu -= inst.weight(); // Second bag positive# --
	918	}
	919	}
	920	prev=split;
	921	}
	922	}
	923
	924	/* Split the data */
	925	Instances[] splitData = new Instances[2];
	926	splitData[0] = new Instances(data, 0, finalSplit);
	927	splitData[1] = new Instances(data, finalSplit, total-finalSplit);
	928
	929	return splitData;
	930	}
	931
	932
	933	/**
	934	* The degree of coverage for the instance given that antecedent
	935	*
	936	* @param inst the instance in question
	937	* @return the numeric value indicating the membership of the instance
	938	* for this antecedent
	939	*/
	940	public double covers(Instance inst){
	941	double isCover=0;
	942	if(!inst.isMissing(att)){
	943	if((int)value == 0){ // First bag
	944	if(inst.value(att) <= splitPoint)
	945	isCover=1;
	946	else if(fuzzyYet && (inst.value(att) > splitPoint) && (inst.value(att) < supportBound ))
	947	isCover= 1-((inst.value(att) - splitPoint)/(supportBound-splitPoint));
	948	}else{
	949	if(inst.value(att) >= splitPoint) // Second bag
	950	isCover=1;
	951	else if(fuzzyYet && inst.value(att) < splitPoint && (inst.value(att) > supportBound ))
	952	isCover= 1-((splitPoint - inst.value(att)) /(splitPoint-supportBound));
	953	}
	954	}
	955
	956	return isCover;
	957	}
	958
	959	/**
	960	* Prints this antecedent
	961	*
	962	* @return a textual description of this antecedent
	963	*/
	964	public String toString() {
	965	if (value == 0){
	966	if (fuzzyYet){
	967	return (att.name() + " in [-inf, -inf, " + Utils.doubleToString(splitPoint, 6) + ", " + Utils.doubleToString(supportBound, 6) + "]");
	968	}
	969	return (att.name() + " in [-inf, " + Utils.doubleToString(splitPoint, 6) + "]");
	970	}else{
	971	if (fuzzyYet){
	972	return (att.name() + " in [" + Utils.doubleToString(supportBound, 6) + ", " + Utils.doubleToString(splitPoint, 6) + ", inf, inf]");
	973	}
	974	return (att.name() + " in [" + Utils.doubleToString(splitPoint, 6) + ", inf]");
	975	}
	976
	977	}
	978
	979	}
	980
	981
	982	/**
	983	* The antecedent with nominal attribute
	984	*/
	985	protected class NominalAntd
	986	extends Antd{
	987
	988	/** for serialization */
	989	static final long serialVersionUID = -9102297038837585135L;
	990
	991	/* The parameters of infoGain calculated for each attribute value
	992	* in the growing data */
	993	private double[] accurate;
	994	private double[] coverage;
	995
	996	/**
	997	* Constructor
	998	*/
	999	public NominalAntd(Attribute a){
	1000	super(a);
	1001	int bag = att.numValues();
	1002	accurate = new double[bag];
	1003	coverage = new double[bag];
	1004	}
	1005
	1006	/**
	1007	* Implements Copyable
	1008	*
	1009	* @return a copy of this object
	1010	*/
	1011	public Object copy(){
	1012	Antd antec = new NominalAntd(getAttr());
	1013	antec.m_confidence = m_confidence;
	1014	antec.value = this.value;
	1015	return antec;
	1016	}
	1017
	1018	/**
	1019	* Implements the splitData function.
	1020	* This procedure is to split the data into bags according
	1021	* to the nominal attribute value
	1022	* The infoGain for each bag is also calculated.
	1023	*
	1024	* @param data the data to be split
	1025	* @param defAcRt the default accuracy rate for data
	1026	* @param cl the class label to be predicted
	1027	* @return the array of data after split
	1028	*/
	1029	public Instances[] splitData(Instances data, double defAcRt,
	1030	double cl){
	1031	int bag = att.numValues();
	1032	Instances[] splitData = new Instances[bag];
	1033
	1034	for(int x=0; x<bag; x++){
	1035	splitData[x] = new Instances(data, data.numInstances());
	1036	accurate[x] = 0;
	1037	coverage[x] = 0;
	1038	}
	1039
	1040	for(int x=0; x<data.numInstances(); x++){
	1041	Instance inst=data.instance(x);
	1042	if(!inst.isMissing(att)){
	1043	int v = (int)inst.value(att);
	1044	splitData[v].add(inst);
	1045	coverage[v] += inst.weight();
	1046	if((int)inst.classValue() == (int)cl)
	1047	accurate[v] += inst.weight();
	1048	}
	1049	}
	1050
	1051	for(int x=0; x<bag; x++){
	1052	double t = coverage[x]+1.0;
	1053	double p = accurate[x] + 1.0;
	1054	double infoGain =
	1055	//Utils.eq(defAcRt, 1.0) ?
	1056	//accurate[x]/(double)numConds :
	1057	accurate[x]*(Utils.log2(p/t)-Utils.log2(defAcRt));
	1058
	1059	if(infoGain > maxInfoGain){
	1060	maxInfoGain = infoGain;
	1061	cover = coverage[x];
	1062	accu = accurate[x];
	1063	accuRate = p/t;
	1064	value = (double)x;
	1065	}
	1066	}
	1067
	1068	return splitData;
	1069	}
	1070
	1071	/**
	1072	* Whether the instance is covered by this antecedent
	1073	*
	1074	* @param inst the instance in question
	1075	* @return the boolean value indicating whether the instance is
	1076	* covered by this antecedent
	1077	*/
	1078	public double covers(Instance inst){
	1079	double isCover=0;
	1080	if(!inst.isMissing(att)){
	1081	if((int)inst.value(att) == (int)value)
	1082	isCover=1;
	1083	}
	1084	return isCover;
	1085	}
	1086
	1087	/**
	1088	* Prints this antecedent
	1089	*
	1090	* @return a textual description of this antecedent
	1091	*/
	1092	public String toString() {
	1093	return (att.name() + " = " +att.value((int)value));
	1094	}
	1095	}
	1096
	1097
	1098	/**
	1099	* This class implements a single rule that predicts specified class.
	1100	*
	1101	* A rule consists of antecedents "AND"ed together and the consequent
	1102	* (class value) for the classification.
	1103	* In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to
	1104	* select an antecedent and Reduced Error Prunning (REP) with the metric
	1105	* of accuracy rate p/(p+n) or (TP+TN)/(P+N) is used to prune the rule.
	1106	*/
	1107	public class RipperRule
	1108	extends Rule{
	1109
	1110	/** for serialization */
	1111	static final long serialVersionUID = -2410020717305262952L;
	1112
	1113	/** The internal representation of the class label to be predicted */
	1114	double m_Consequent = -1;
	1115
	1116	/** The vector of antecedents of this rule*/
	1117	public FastVector m_Antds = null;
	1118
	1119	/** Constructor */
	1120	public RipperRule(){
	1121	m_Antds = new FastVector();
	1122	}
	1123
	1124	/**
	1125	* Sets the internal representation of the class label to be predicted
	1126	*
	1127	* @param cl the internal representation of the class label to be predicted
	1128	*/
	1129	public void setConsequent(double cl) {
	1130	m_Consequent = cl;
	1131	}
	1132
	1133	/**
	1134	* Gets the internal representation of the class label to be predicted
	1135	*
	1136	* @return the internal representation of the class label to be predicted
	1137	*/
	1138	public double getConsequent() {
	1139	return m_Consequent;
	1140	}
	1141
	1142	/**
	1143	* Get a shallow copy of this rule
	1144	*
	1145	* @return the copy
	1146	*/
	1147	public Object copy(){
	1148	RipperRule copy = new RipperRule();
	1149	copy.setConsequent(getConsequent());
	1150	copy.m_Antds = (FastVector)this.m_Antds.copyElements();
	1151	return copy;
	1152	}
	1153
	1154
	1155
	1156	/**
	1157	* The degree of coverage instance covered by this rule
	1158	*
	1159	* @param datum the instance in question
	1160	* @return the degree to which the instance
	1161	* is covered by this rule
	1162	*/
	1163	public double coverageDegree(Instance datum){
	1164	double coverage = 1;
	1165
	1166	for(int i=0; i<m_Antds.size(); i++){
	1167	Antd antd = (Antd)m_Antds.elementAt(i);
	1168	if(m_tNorm == TNORM_PROD){
	1169	// Product T-Norm
	1170	if (antd instanceof NumericAntd)
	1171	coverage *= ((NumericAntd)antd).covers(datum);
	1172	else
	1173	coverage *= antd.covers(datum);
	1174	}else{
	1175	// Min T-Norm
	1176	if (antd instanceof NumericAntd)
	1177	coverage = Math.min(coverage, ((NumericAntd)antd).covers(datum));
	1178	else
	1179	coverage = Math.min(coverage, antd.covers(datum));
	1180	}
	1181
	1182	}
	1183
	1184	return coverage;
	1185	}
	1186
	1187	/**
	1188	* Whether the instance covered by this rule
	1189	*
	1190	* @param datum the instance in question
	1191	* @return the boolean value indicating whether the instance
	1192	* is covered by this rule
	1193	*/
	1194	public boolean covers(Instance datum){
	1195	if (coverageDegree(datum) == 0){
	1196	return false;
	1197	}else{
	1198	return true;
	1199	}
	1200	}
	1201
	1202	/**
	1203	* Whether this rule has antecedents, i.e. whether it is a default rule
	1204	*
	1205	* @return the boolean value indicating whether the rule has antecedents
	1206	*/
	1207	public boolean hasAntds(){
	1208	if (m_Antds == null)
	1209	return false;
	1210	else
	1211	return (m_Antds.size() > 0);
	1212	}
	1213
	1214	/**
	1215	* the number of antecedents of the rule
	1216	*
	1217	* @return the size of this rule
	1218	*/
	1219	public double size(){ return (double)m_Antds.size(); }
	1220
	1221
	1222	/**
	1223	* Private function to compute default number of accurate instances
	1224	* in the specified data for the consequent of the rule
	1225	*
	1226	* @param data the data in question
	1227	* @return the default accuracy number
	1228	*/
	1229	private double computeDefAccu(Instances data){
	1230	double defAccu=0;
	1231	for(int i=0; i<data.numInstances(); i++){
	1232	Instance inst = data.instance(i);
	1233	if((int)inst.classValue() == (int)m_Consequent)
	1234	defAccu += inst.weight();
	1235	}
	1236	return defAccu;
	1237	}
	1238
	1239
	1240	/**
	1241	* Build one rule using the growing data
	1242	*
	1243	* @param data the growing data used to build the rule
	1244	* @throws Exception if the consequent is not set yet
	1245	*/
	1246	public void grow(Instances data) throws Exception {
	1247	if(m_Consequent == -1)
	1248	throw new Exception(" Consequent not set yet.");
	1249
	1250	Instances growData = data;
	1251	double sumOfWeights = growData.sumOfWeights();
	1252	if(!Utils.gr(sumOfWeights, 0.0))
	1253	return;
	1254
	1255	/* Compute the default accurate rate of the growing data */
	1256	double defAccu = computeDefAccu(growData);
	1257	double defAcRt = (defAccu+1.0)/(sumOfWeights+1.0);
	1258
	1259	/* Keep the record of which attributes have already been used*/
	1260	boolean[] used=new boolean [growData.numAttributes()];
	1261	for (int k=0; k<used.length; k++)
	1262	used[k]=false;
	1263	int numUnused=used.length;
	1264
	1265	// If there are already antecedents existing
	1266	for(int j=0; j < m_Antds.size(); j++){
	1267	Antd antdj = (Antd)m_Antds.elementAt(j);
	1268	if(!antdj.getAttr().isNumeric()){
	1269	used[antdj.getAttr().index()]=true;
	1270	numUnused--;
	1271	}
	1272	}
	1273
	1274	double maxInfoGain;
	1275	while (Utils.gr(growData.numInstances(), 0.0) &&
	1276	(numUnused > 0)
	1277	&& Utils.sm(defAcRt, 1.0)
	1278	){
	1279
	1280	// We require that infoGain be positive
	1281	/*if(numAntds == originalSize)
	1282	maxInfoGain = 0.0; // At least one condition allowed
	1283	else
	1284	maxInfoGain = Utils.eq(defAcRt, 1.0) ?
	1285	defAccu/(double)numAntds : 0.0; */
	1286	maxInfoGain = 0.0;
	1287
	1288	/* Build a list of antecedents */
	1289	Antd oneAntd=null;
	1290	Instances coverData = null;
	1291	Enumeration enumAttr=growData.enumerateAttributes();
	1292
	1293	/* Build one condition based on all attributes not used yet*/
	1294	while (enumAttr.hasMoreElements()){
	1295	Attribute att= (Attribute)(enumAttr.nextElement());
	1296
	1297	if(m_Debug)
	1298	System.err.println("\nOne condition: size = "
	1299	+ growData.sumOfWeights());
	1300
	1301	Antd antd =null;
	1302	if(att.isNumeric())
	1303	antd = new NumericAntd(att);
	1304	else
	1305	antd = new NominalAntd(att);
	1306
	1307	if(!used[att.index()]){
	1308	/* Compute the best information gain for each attribute,
	1309	it's stored in the antecedent formed by this attribute.
	1310	This procedure returns the data covered by the antecedent*/
	1311	Instances coveredData = computeInfoGain(growData, defAcRt,
	1312	antd);
	1313	if(coveredData != null){
	1314	double infoGain = antd.getMaxInfoGain();
	1315	if(m_Debug)
	1316	System.err.println("Test of \'"+antd.toString()+
	1317	"\': infoGain = "+
	1318	infoGain + " \| Accuracy = " +
	1319	antd.getAccuRate()+
	1320	"="+antd.getAccu()
	1321	+"/"+antd.getCover()+
	1322	" def. accuracy: "+defAcRt);
	1323
	1324	if(infoGain > maxInfoGain){
	1325	oneAntd=antd;
	1326	coverData = coveredData;
	1327	maxInfoGain = infoGain;
	1328	}
	1329	}
	1330	}
	1331	}
	1332
	1333	if(oneAntd == null) break; // Cannot find antds
	1334	if(Utils.sm(oneAntd.getAccu(), m_MinNo)) break;// Too low coverage
	1335
	1336	//Numeric attributes can be used more than once
	1337	if(!oneAntd.getAttr().isNumeric()){
	1338	used[oneAntd.getAttr().index()]=true;
	1339	numUnused--;
	1340	}
	1341
	1342	m_Antds.addElement(oneAntd);
	1343
	1344
	1345	growData = coverData;// Grow data size is shrinking
	1346	defAcRt = oneAntd.getAccuRate();
	1347	}
	1348	}
	1349
	1350
	1351	/**
	1352	* Compute the best information gain for the specified antecedent
	1353	*
	1354	* @param instances the data based on which the infoGain is computed
	1355	* @param defAcRt the default accuracy rate of data
	1356	* @param antd the specific antecedent
	1357	* @return the data covered by the antecedent
	1358	*/
	1359	private Instances computeInfoGain(Instances instances, double defAcRt,
	1360	Antd antd){
	1361	Instances data = instances;
	1362
	1363	/* Split the data into bags.
	1364	The information gain of each bag is also calculated in this procedure */
	1365	Instances[] splitData = antd.splitData(data, defAcRt,
	1366	m_Consequent);
	1367
	1368	/* Get the bag of data to be used for next antecedents */
	1369	if(splitData != null)
	1370	return splitData[(int)antd.getAttrValue()];
	1371	else return null;
	1372	}
	1373
	1374	/**
	1375	* Prune all the possible final sequences of the rule using the
	1376	* pruning data. The measure used to prune the rule is based on
	1377	* flag given.
	1378	*
	1379	* @param pruneData the pruning data used to prune the rule
	1380	* @param useWhole flag to indicate whether use the error rate of
	1381	* the whole pruning data instead of the data covered
	1382	*/
	1383	public void prune(Instances pruneData, boolean useWhole){
	1384	Instances data = pruneData;
	1385
	1386	double total = data.sumOfWeights();
	1387	if(!Utils.gr(total, 0.0))
	1388	return;
	1389
	1390	/* The default accurate # and rate on pruning data */
	1391	double defAccu=computeDefAccu(data);
	1392
	1393	if(m_Debug)
	1394	System.err.println("Pruning with " + defAccu +
	1395	" positive data out of " + total +
	1396	" instances");
	1397
	1398	int size=m_Antds.size();
	1399	if(size == 0) return; // Default rule before pruning
	1400
	1401	double[] worthRt = new double[size];
	1402	double[] coverage = new double[size];
	1403	double[] worthValue = new double[size];
	1404	for(int w=0; w<size; w++){
	1405	worthRt[w]=coverage[w]=worthValue[w]=0.0;
	1406	}
	1407
	1408	/* Calculate accuracy parameters for all the antecedents in this rule */
	1409	double tn = 0.0; // True negative if useWhole
	1410	for(int x=0; x<size; x++){
	1411	Antd antd=(Antd)m_Antds.elementAt(x);
	1412	Instances newData = data;
	1413	data = new Instances(newData, 0); // Make data empty
	1414
	1415	for(int y=0; y<newData.numInstances(); y++){
	1416	Instance ins=newData.instance(y);
	1417
	1418	if(antd.covers(ins)>0){ // Covered by this antecedent
	1419	coverage[x] += ins.weight();
	1420	data.add(ins); // Add to data for further pruning
	1421	if((int)ins.classValue() == (int)m_Consequent) // Accurate prediction
	1422	worthValue[x] += ins.weight();
	1423	}
	1424	else if(useWhole){ // Not covered
	1425	if((int)ins.classValue() != (int)m_Consequent)
	1426	tn += ins.weight();
	1427	}
	1428	}
	1429
	1430	if(useWhole){
	1431	worthValue[x] += tn;
	1432	worthRt[x] = worthValue[x] / total;
	1433	}
	1434	else // Note if coverage is 0, accuracy is 0.5
	1435	worthRt[x] = (worthValue[x]+1.0)/(coverage[x]+2.0);
	1436	}
	1437
	1438	double maxValue = (defAccu+1.0)/(total+2.0);
	1439	int maxIndex = -1;
	1440	for(int i=0; i<worthValue.length; i++){
	1441	if(m_Debug){
	1442	double denom = useWhole ? total : coverage[i];
	1443	System.err.println(i+"(useAccuray? "+!useWhole+"): "
	1444	+ worthRt[i] +
	1445	"="+worthValue[i]+
	1446	"/"+denom);
	1447	}
	1448	if(worthRt[i] > maxValue){ // Prefer to the
	1449	maxValue = worthRt[i]; // shorter rule
	1450	maxIndex = i;
	1451	}
	1452	}
	1453
	1454	if (maxIndex==-1) return;
	1455
	1456	/* Prune the antecedents according to the accuracy parameters */
	1457	for(int z=size-1;z>maxIndex;z--)
	1458	m_Antds.removeElementAt(z);
	1459	}
	1460
	1461	/**
	1462	* Prints this rule
	1463	*
	1464	* @param classAttr the class attribute in the data
	1465	* @return a textual description of this rule
	1466	*/
	1467	public String toString(Attribute classAttr) {
	1468	StringBuffer text = new StringBuffer();
	1469	if(m_Antds.size() > 0){
	1470	for(int j=0; j< (m_Antds.size()-1); j++)
	1471	text.append("(" + ((Antd)(m_Antds.elementAt(j))).toString()+ ") and ");
	1472	text.append("("+((Antd)(m_Antds.lastElement())).toString() + ")");
	1473	}
	1474	text.append(" => " + classAttr.name() +
	1475	"=" + classAttr.value((int)m_Consequent));
	1476
	1477	return text.toString();
	1478	}
	1479
	1480	/**
	1481	* The fuzzification procedure
	1482	* @param data training data
	1483	* @param allWeightsAreOne flag whether all instances have weight 1. If this is the case branch-and-bound is possible for speed-up.
	1484	*/
	1485	public void fuzzify(Instances data, boolean allWeightsAreOne){
	1486	// Determine whether there are numeric antecedents that can be fuzzified.
	1487	if (m_Antds == null) return;
	1488	int numNumericAntds = 0;
	1489	for (int i = 0; i < m_Antds.size(); i++){
	1490	if (m_Antds.elementAt(i) instanceof NumericAntd)
	1491	numNumericAntds++;
	1492	}
	1493	if (numNumericAntds == 0)
	1494	return;
	1495
	1496	double maxPurity = Double.NEGATIVE_INFINITY;
	1497	boolean[] finishedAntecedents = new boolean[m_Antds.size()];
	1498	int numFinishedAntecedents = 0;
	1499
	1500	// Loop until all antecdents have been fuzzified
	1501	while (numFinishedAntecedents<m_Antds.size()){
	1502	double maxPurityOfAllAntecedents = Double.NEGATIVE_INFINITY;
	1503	int bestAntecedentsIndex = -1;
	1504	double bestSupportBoundForAllAntecedents = Double.NaN;
	1505
	1506	Instances relevantData = new Instances(data,0);
	1507	for (int j = 0; j < m_Antds.size(); j++){
	1508	if(finishedAntecedents[j]) continue;
	1509
	1510	relevantData = new Instances (data);
	1511	/*
	1512	* Remove instances which are not relevant, because they are not covered
	1513	* by the _other_ antecedents.
	1514	*/
	1515	for (int k = 0; k < m_Antds.size(); k++){
	1516	if (k==j) continue;
	1517	Antd exclusionAntd = ((Antd)m_Antds.elementAt(k));
	1518	for (int y = 0; y < relevantData.numInstances(); y++){
	1519	if (exclusionAntd.covers(relevantData.instance(y)) == 0){
	1520	relevantData.delete(y--);
	1521	}
	1522	}
	1523	}
	1524
	1525	// test whether this antecedent is numeric and whether there is data for making it fuzzy
	1526	if (relevantData.attribute(((Antd)m_Antds.elementAt(j)).att.index()).isNumeric() && relevantData.numInstances()>0){
	1527	// Get a working copy of this antecedent
	1528	NumericAntd currentAntd = (NumericAntd) ((NumericAntd) m_Antds.elementAt(j)).copy();
	1529	currentAntd.fuzzyYet=true;
	1530
	1531	relevantData.deleteWithMissing(currentAntd.att.index());
	1532
	1533	double sumOfWeights = relevantData.sumOfWeights();
	1534	if(!Utils.gr(sumOfWeights, 0.0))
	1535	return;
	1536
	1537	relevantData.sort(currentAntd.att.index());
	1538
	1539	double maxPurityForThisAntecedent = 0;
	1540	double bestFoundSupportBound = Double.NaN;
	1541
	1542	double lastAccu = 0;
	1543	double lastCover = 0;
	1544	// Test all possible edge points
	1545	if (currentAntd.value == 0){
	1546	for (int k = 1; k < relevantData.numInstances(); k++){
	1547	// break the loop if there is no gain (only works when all instances have weight 1)
	1548	if ((lastAccu+(relevantData.numInstances()-k-1))/(lastCover+(relevantData.numInstances()-k-1)) < maxPurityForThisAntecedent && allWeightsAreOne){
	1549	break;
	1550	}
	1551
	1552	// Bag 1
	1553	if (currentAntd.splitPoint < relevantData.instance(k).value(currentAntd.att.index())
	1554	&& relevantData.instance(k).value(currentAntd.att.index()) != relevantData.instance(k-1).value(currentAntd.att.index())){
	1555	currentAntd.supportBound = relevantData.instance(k).value(currentAntd.att.index());
	1556
	1557	// Calculate the purity of this fuzzification
	1558	double[] accuArray = new double[relevantData.numInstances()];
	1559	double[] coverArray = new double[relevantData.numInstances()];
	1560	for (int i = 0; i < relevantData.numInstances(); i++){
	1561	coverArray[i] = relevantData.instance(i).weight();
	1562	double coverValue = currentAntd.covers(relevantData.instance(i));
	1563	if (coverArray[i] >= coverValue*relevantData.instance(i).weight()){
	1564	coverArray[i] = coverValue*relevantData.instance(i).weight();
	1565	if (relevantData.instance(i).classValue() == m_Consequent){
	1566	accuArray[i] = coverValue*relevantData.instance(i).weight();
	1567	}
	1568	}
	1569	}
	1570
	1571	// Test whether this fuzzification is the best one for this antecedent.
	1572	// Keep it if this is the case.
	1573	double purity = (Utils.sum(accuArray)) / (Utils.sum(coverArray));
	1574	if (purity >= maxPurityForThisAntecedent){
	1575	maxPurityForThisAntecedent =purity;
	1576	bestFoundSupportBound = currentAntd.supportBound;
	1577	}
	1578	lastAccu = Utils.sum(accuArray);
	1579	lastCover = Utils.sum(coverArray);
	1580	}
	1581	}
	1582	}else{
	1583	for (int k = relevantData.numInstances()-2; k >=0; k--){
	1584	// break the loop if there is no gain (only works when all instances have weight 1)
	1585	if ((lastAccu+(k))/(lastCover+(k)) < maxPurityForThisAntecedent && allWeightsAreOne){
	1586	break;
	1587	}
	1588
	1589	//Bag 2
	1590	if (currentAntd.splitPoint > relevantData.instance(k).value(currentAntd.att.index())
	1591	&& relevantData.instance(k).value(currentAntd.att.index()) != relevantData.instance(k+1).value(currentAntd.att.index())){
	1592	currentAntd.supportBound = relevantData.instance(k).value(currentAntd.att.index());
	1593
	1594	// Calculate the purity of this fuzzification
	1595	double[] accuArray = new double[relevantData.numInstances()];
	1596	double[] coverArray = new double[relevantData.numInstances()];
	1597	for (int i = 0; i < relevantData.numInstances(); i++){
	1598	coverArray[i] = relevantData.instance(i).weight();
	1599	double coverValue = currentAntd.covers(relevantData.instance(i));
	1600	if (coverArray[i] >= coverValue*relevantData.instance(i).weight()){
	1601	coverArray[i] = coverValue*relevantData.instance(i).weight();
	1602	if (relevantData.instance(i).classValue() == m_Consequent){
	1603	accuArray[i] = coverValue*relevantData.instance(i).weight();
	1604	}
	1605	}
	1606	}
	1607
	1608	// Test whether this fuzzification is the best one for this antecedent.
	1609	// Keep it if this is the case.
	1610	double purity = (Utils.sum(accuArray)) / (Utils.sum(coverArray));
	1611	if (purity >= maxPurityForThisAntecedent){
	1612	maxPurityForThisAntecedent =purity;
	1613	bestFoundSupportBound = currentAntd.supportBound;
	1614	}
	1615	lastAccu = Utils.sum(accuArray);
	1616	lastCover = Utils.sum(coverArray);
	1617	}
	1618	}
	1619
	1620	}
	1621
	1622	// Test whether the best fuzzification for this antecedent is the best one of all
	1623	// antecedents considered so far.
	1624	// Keep it if this is the case.
	1625	if (maxPurityForThisAntecedent>maxPurityOfAllAntecedents){
	1626	bestAntecedentsIndex = j;
	1627	bestSupportBoundForAllAntecedents = bestFoundSupportBound;
	1628	maxPurityOfAllAntecedents = maxPurityForThisAntecedent;
	1629	}
	1630	}else{
	1631	// Deal with a nominal antecedent.
	1632	// Since there is no fuzzification it is already finished.
	1633	finishedAntecedents[j] = true;
	1634	numFinishedAntecedents++;
	1635	continue;
	1636	}
	1637	}
	1638
	1639	// Make the fuzzification step for the current antecedent real.
	1640	if (maxPurity <= maxPurityOfAllAntecedents){
	1641	if (Double.isNaN(bestSupportBoundForAllAntecedents)){
	1642	((NumericAntd)m_Antds.elementAt(bestAntecedentsIndex)).supportBound = ((NumericAntd)m_Antds.elementAt(bestAntecedentsIndex)).splitPoint;
	1643	}else{
	1644	((NumericAntd)m_Antds.elementAt(bestAntecedentsIndex)).supportBound = bestSupportBoundForAllAntecedents;
	1645	((NumericAntd)m_Antds.elementAt(bestAntecedentsIndex)).fuzzyYet = true;
	1646	}
	1647	maxPurity = maxPurityOfAllAntecedents;
	1648	}
	1649	finishedAntecedents[bestAntecedentsIndex] = true;
	1650	numFinishedAntecedents++;
	1651	}
	1652
	1653	}
	1654
	1655	/**
	1656	* Calculation of the rule weights / confidences for all beginning rule stumps.
	1657	* @param data The training data
	1658	*/
	1659	public void calculateConfidences(Instances data) {
	1660	RipperRule tempRule = (RipperRule) this.copy();
	1661
	1662	while(tempRule.hasAntds()){
	1663	double acc = 0;
	1664	double cov = 0;
	1665	for (int i = 0; i < data.numInstances(); i++){
	1666	double membershipValue = tempRule.coverageDegree(data.instance(i)) * data.instance(i).weight();
	1667	cov += membershipValue;
	1668	if (m_Consequent == data.instance(i).classValue()){
	1669	acc += membershipValue;
	1670	}
	1671	}
	1672
	1673	// m-estimate
	1674	double m = 2.0;
	1675	((Antd)this.m_Antds.elementAt((int)tempRule.size()-1)).m_confidence =
	1676	(acc+m*(aprioriDistribution[(int)m_Consequent]/
	1677	Utils.sum(aprioriDistribution))) / (cov+m);
	1678	tempRule.m_Antds.removeElementAt(tempRule.m_Antds.size()-1);
	1679	}
	1680	}
	1681
	1682
	1683	/**
	1684	* Get the rule confidence.
	1685	* @return rule confidence / weight
	1686	*/
	1687	public double getConfidence(){
	1688	if (!hasAntds())
	1689	return Double.NaN;
	1690	return ((Antd)m_Antds.lastElement()).m_confidence;
	1691	}
	1692
	1693	/**
	1694	*
	1695	*/
	1696	public String getRevision() {
	1697	return "1.0";
	1698	}
	1699
	1700	}
	1701
	1702	/**
	1703	* Returns default capabilities of the classifier.
	1704	*
	1705	* @return the capabilities of this classifier
	1706	*/
	1707	public Capabilities getCapabilities() {
	1708	Capabilities result = super.getCapabilities();
	1709	result.disableAll();
	1710
	1711	// attributes
	1712	result.enable(Capability.NOMINAL_ATTRIBUTES);
	1713	result.enable(Capability.NUMERIC_ATTRIBUTES);
	1714	result.enable(Capability.DATE_ATTRIBUTES);
	1715	result.enable(Capability.MISSING_VALUES);
	1716
	1717	// class
	1718	result.enable(Capability.NOMINAL_CLASS);
	1719	result.enable(Capability.MISSING_CLASS_VALUES);
	1720
	1721	// instances
	1722	result.setMinimumNumberInstances(m_Folds);
	1723
	1724	return result;
	1725	}
	1726
	1727	/**
	1728	* Builds the FURIA rule-based model
	1729	*
	1730	* @param instances the training data
	1731	* @throws Exception if classifier can't be built successfully
	1732	*/
	1733	public void buildClassifier(Instances instances) throws Exception {
	1734	// can classifier handle the data?
	1735	getCapabilities().testWithFail(instances);
	1736
	1737	// remove instances with missing class
	1738	instances = new Instances(instances);
	1739	instances.deleteWithMissingClass();
	1740
	1741	// Learn the apriori distribution for later
	1742	aprioriDistribution = new double[instances.classAttribute().numValues()];
	1743	boolean allWeightsAreOne = true;
	1744	for (int i = 0 ; i < instances.numInstances(); i++){
	1745	aprioriDistribution[(int)instances.instance(i).classValue()]+=instances.instance(i).weight();
	1746	if (allWeightsAreOne && instances.instance(i).weight() != 1.0){
	1747	allWeightsAreOne = false;
	1748	break;
	1749	}
	1750	}
	1751
	1752
	1753	m_Random = instances.getRandomNumberGenerator(m_Seed);
	1754	m_Total = RuleStats.numAllConditions(instances);
	1755	if(m_Debug)
	1756	System.err.println("Number of all possible conditions = "+m_Total);
	1757
	1758	Instances data = new Instances(instances);
	1759
	1760	m_Class = data.classAttribute();
	1761	m_Ruleset = new FastVector();
	1762	m_RulesetStats = new FastVector();
	1763	m_Distributions = new FastVector();
	1764
	1765
	1766	// Learn a rule set for each single class
	1767	oneClass:
	1768	for(int y=0; y < data.numClasses(); y++){ // For each class
	1769
	1770	double classIndex = (double)y;
	1771	if(m_Debug){
	1772	int ci = (int)classIndex;
	1773	System.err.println("\n\nClass "+m_Class.value(ci)+"("+ci+"): "
	1774	+ aprioriDistribution[y] + "instances\n"+
	1775	"=====================================\n");
	1776	}
	1777
	1778	if(Utils.eq(aprioriDistribution[y],0.0)) // No data for this class
	1779	continue oneClass;
	1780
	1781	// The expected FP/err is the proportion of the class
	1782	double expFPRate = (aprioriDistribution[y] / Utils.sum(aprioriDistribution));
	1783
	1784
	1785	double classYWeights = 0, totalWeights = 0;
	1786	for(int j=0; j < data.numInstances(); j++){
	1787	Instance datum = data.instance(j);
	1788	totalWeights += datum.weight();
	1789	if((int)datum.classValue() == y){
	1790	classYWeights += datum.weight();
	1791	}
	1792	}
	1793
	1794	// DL of default rule, no theory DL, only data DL
	1795	double defDL;
	1796	if(classYWeights > 0)
	1797	defDL = RuleStats.dataDL(expFPRate,
	1798	0.0,
	1799	totalWeights,
	1800	0.0,
	1801	classYWeights);
	1802	else
	1803	continue oneClass; // Subsumed by previous rules
	1804
	1805
	1806
	1807	if(Double.isNaN(defDL) \|\| Double.isInfinite(defDL))
	1808	throw new Exception("Should never happen: "+
	1809	"defDL NaN or infinite!");
	1810	if(m_Debug)
	1811	System.err.println("The default DL = "+defDL);
	1812
	1813	rulesetForOneClass(expFPRate, data, classIndex, defDL);
	1814	}
	1815
	1816	// Remove redundant antecedents
	1817	for(int z=0; z < m_Ruleset.size(); z++){
	1818	RipperRule rule = (RipperRule)m_Ruleset.elementAt(z);
	1819	for(int j = 0; j < rule.m_Antds.size(); j++){
	1820	Antd outerAntd = (Antd)rule.m_Antds.elementAt(j);
	1821	for (int k = j+1; k < rule.m_Antds.size(); k++){
	1822	Antd innerAntd = (Antd)rule.m_Antds.elementAt(k);
	1823	if (outerAntd.att.index() == innerAntd.att.index() && outerAntd.value==innerAntd.value){
	1824	rule.m_Antds.setElementAt(rule.m_Antds.elementAt(k), j);
	1825	rule.m_Antds.removeElementAt(k--);
	1826	}
	1827	}
	1828	}
	1829	}
	1830
	1831
	1832	// Fuzzify all rules
	1833	for(int z=0; z < m_RulesetStats.size(); z++){
	1834	RuleStats oneClass = (RuleStats)m_RulesetStats.elementAt(z);
	1835	for(int xyz=0; xyz < oneClass.getRulesetSize(); xyz++){
	1836	RipperRule rule = (RipperRule)((FastVector)oneClass.getRuleset()).elementAt(xyz);
	1837
	1838	// do the fuzzification for all known antecedents
	1839	rule.fuzzify(data, allWeightsAreOne);
	1840
	1841	double[] classDist = oneClass.getDistributions(xyz);
	1842	// Check for sum=0, because otherwise it does not work
	1843	if (Utils.sum(classDist)>0) Utils.normalize(classDist);
	1844	if(classDist != null)
	1845	m_Distributions.addElement(classDist);
	1846	}
	1847	}
	1848
	1849
	1850	// if there was some problem during fuzzification, set the support bound
	1851	// to the trivial fuzzification position
	1852	for(int z=0; z < m_Ruleset.size(); z++){
	1853	RipperRule rule = (RipperRule)m_Ruleset.elementAt(z);
	1854	for(int j = 0; j < rule.m_Antds.size(); j++){
	1855	Antd antd = (Antd)rule.m_Antds.elementAt(j);
	1856	if (antd instanceof NumericAntd) {
	1857	NumericAntd numAntd = (NumericAntd) antd;
	1858
	1859
	1860	if (!numAntd.fuzzyYet){
	1861	for (int i = 0; i < data.numInstances(); i++){
	1862	if ((numAntd.value == 1 &&
	1863	numAntd.splitPoint > data.instance(i).value(numAntd.att.index()) &&
	1864	(numAntd.supportBound < data.instance(i).value(numAntd.att.index()) \|\|
	1865	!numAntd.fuzzyYet)
	1866	)
	1867	\|\|
	1868	(numAntd.value == 0 &&
	1869	numAntd.splitPoint < data.instance(i).value(numAntd.att.index()) &&
	1870	(numAntd.supportBound > data.instance(i).value(numAntd.att.index()) \|\|
	1871	!numAntd.fuzzyYet)
	1872	)
	1873	){
	1874	numAntd.supportBound = data.instance(i).value(numAntd.att.index());
	1875	numAntd.fuzzyYet = true;
	1876	}
	1877	}
	1878
	1879	}
	1880	}
	1881	}
	1882	}
	1883
	1884	//Determine confidences
	1885	for(int z=0; z < m_Ruleset.size(); z++){
	1886	RipperRule rule = (RipperRule)m_Ruleset.elementAt(z);
	1887	rule.calculateConfidences(data);
	1888	}
	1889	}
	1890
	1891	/**
	1892	* Classify the test instance with the rule learner and provide
	1893	* the class distributions
	1894	*
	1895	* @param datum the instance to be classified
	1896	* @return the distribution
	1897	* @throws Exception
	1898	*/
	1899
	1900	public double[] distributionForInstance(Instance datum) throws Exception{
	1901	//test for multiple overlap of rules
	1902	double[] rulesCoveringForEachClass = new double[datum.numClasses()];
	1903	for(int i=0; i < m_Ruleset.size(); i++){
	1904	RipperRule rule = (RipperRule)m_Ruleset.elementAt(i);
	1905
	1906	/* In case that one class does not contain any instances (e.g. in UCI-dataset glass),
	1907	* a default rule assigns all instances to the other class. Such a rule may be ignored here.
	1908	*/
	1909	if (!rule.hasAntds())
	1910	continue;
	1911
	1912
	1913	// Calculate the maximum degree of coverage
	1914	if(rule.covers(datum)){
	1915	rulesCoveringForEachClass[(int)rule.m_Consequent] += rule.coverageDegree(datum) * rule.getConfidence();
	1916	}
	1917
	1918	}
	1919
	1920
	1921	// If no rule covered the example, then maybe start the rule stretching
	1922	if (Utils.sum(rulesCoveringForEachClass)==0){
	1923
	1924	// If rule stretching is not allowed,
	1925	// return either the apriori prediction
	1926	if (m_uncovAction == UNCOVACTION_APRIORI){
	1927	rulesCoveringForEachClass = aprioriDistribution;
	1928	if (Utils.sum(rulesCoveringForEachClass)>0)
	1929	Utils.normalize(rulesCoveringForEachClass);
	1930	return rulesCoveringForEachClass;
	1931	}
	1932	// or abstain from that decision at all.
	1933	if (m_uncovAction == UNCOVACTION_REJECT)
	1934	return rulesCoveringForEachClass;
	1935
	1936	// Copy the ruleset as backup
	1937	FastVector origRuleset = (FastVector) m_Ruleset.copyElements();
	1938
	1939	// Find for every rule the first antecedent that does not
	1940	// cover the given instance.
	1941	rulesCoveringForEachClass = new double[rulesCoveringForEachClass.length];
	1942	for(int i=0; i < m_Ruleset.size(); i++){
	1943	RipperRule rule = (RipperRule)m_Ruleset.elementAt(i);
	1944	double numAntdsBefore = rule.m_Antds.size();
	1945
	1946	int firstAntdToDelete = Integer.MAX_VALUE;
	1947	for (int j = 0; j < rule.m_Antds.size(); j++){
	1948	if (((Antd)rule.m_Antds.elementAt(j)).covers(datum)==0){
	1949	firstAntdToDelete = j;
	1950	break;
	1951	}
	1952	}
	1953
	1954	// Prune antecedent such that it covers the instance
	1955	for (int j = firstAntdToDelete; j < rule.m_Antds.size(); j++){
	1956	rule.m_Antds.removeElementAt(j--);
	1957	}
	1958	double numAntdsAfter = rule.m_Antds.size();
	1959
	1960	// Empty rules shall not vote here
	1961	if (!rule.hasAntds())
	1962	continue;
	1963
	1964	// Calculate the maximum degree of coverage and weight the rule
	1965	// by its confidence and the fraction of antecedents left after
	1966	// rule stretching
	1967	double secondWeight = (numAntdsAfter+1)/(numAntdsBefore+2) ;
	1968	if (rule.getConfidence() secondWeightrule.coverageDegree(datum) >= rulesCoveringForEachClass[(int)rule.getConsequent()]){
	1969	rulesCoveringForEachClass[(int)rule.getConsequent()] = rule.getConfidence()secondWeightrule.coverageDegree(datum);
	1970	}
	1971	}
	1972
	1973	// Reestablish original ruleset
	1974	m_Ruleset = origRuleset;
	1975	}
	1976
	1977	//check for conflicts
	1978	double[] maxClasses = new double[rulesCoveringForEachClass.length];
	1979	for (int i = 0; i < rulesCoveringForEachClass.length; i++){
	1980	if (rulesCoveringForEachClass[Utils.maxIndex(rulesCoveringForEachClass)] ==
	1981	rulesCoveringForEachClass[i] && rulesCoveringForEachClass[i]>0)
	1982	maxClasses[i] = 1;
	1983	}
	1984
	1985	//If there is a conflict, resolve it using the apriori distribution
	1986	if (Utils.sum(maxClasses)>0){
	1987	for (int i = 0; i < maxClasses.length; i++){
	1988	if (maxClasses[i] > 0 && aprioriDistribution[i] != rulesCoveringForEachClass[Utils.maxIndex(rulesCoveringForEachClass)])
	1989	rulesCoveringForEachClass[i] -= 0.00001;
	1990	}
	1991	}
	1992
	1993	// If no stretched rule was able to cover the instance,
	1994	// then fall back to the apriori distribution
	1995	if (Utils.sum(rulesCoveringForEachClass)==0){
	1996	rulesCoveringForEachClass = aprioriDistribution;
	1997	}
	1998
	1999
	2000	if (Utils.sum(rulesCoveringForEachClass)>0)
	2001	Utils.normalize(rulesCoveringForEachClass);
	2002
	2003	return rulesCoveringForEachClass;
	2004
	2005	}
	2006
	2007
	2008	/** Build a ruleset for the given class according to the given data
	2009	*
	2010	* @param expFPRate the expected FP/(FP+FN) used in DL calculation
	2011	* @param data the given data
	2012	* @param classIndex the given class index
	2013	* @param defDL the default DL in the data
	2014	* @throws Exception if the ruleset can be built properly
	2015	*/
	2016	protected Instances rulesetForOneClass(double expFPRate,
	2017	Instances data,
	2018	double classIndex,
	2019	double defDL)
	2020	throws Exception {
	2021
	2022	Instances newData = data, growData, pruneData;
	2023	boolean stop = false;
	2024	FastVector ruleset = new FastVector();
	2025
	2026	double dl = defDL, minDL = defDL;
	2027	RuleStats rstats = null;
	2028	double[] rst;
	2029
	2030	// Check whether data have positive examples
	2031	boolean defHasPositive = true; // No longer used
	2032	boolean hasPositive = defHasPositive;
	2033
	2034	/******************** Building stage *********************/
	2035	if(m_Debug)
	2036	System.err.println("\n* Building stage *");
	2037
	2038
	2039	while((!stop) && hasPositive){ // Generate new rules until
	2040	// stopping criteria met
	2041	RipperRule oneRule;
	2042
	2043	oneRule = new RipperRule();
	2044	oneRule.setConsequent(classIndex); // Must set first
	2045	if(m_Debug)
	2046	System.err.println("\nNo pruning: growing a rule ...");
	2047	oneRule.grow(newData); // Build the rule
	2048	if(m_Debug)
	2049	System.err.println("No pruning: one rule found:\n"+
	2050	oneRule.toString(m_Class));
	2051
	2052
	2053	// Compute the DL of this ruleset
	2054	if(rstats == null){ // First rule
	2055	rstats = new RuleStats();
	2056	rstats.setNumAllConds(m_Total);
	2057	rstats.setData(newData);
	2058	}
	2059
	2060	rstats.addAndUpdate(oneRule);
	2061	int last = rstats.getRuleset().size()-1; // Index of last rule
	2062	dl += rstats.relativeDL(last, expFPRate, m_CheckErr);
	2063
	2064	if(Double.isNaN(dl) \|\| Double.isInfinite(dl))
	2065	throw new Exception("Should never happen: dl in "+
	2066	"building stage NaN or infinite!");
	2067	if(m_Debug)
	2068	System.err.println("Before optimization("+last+
	2069	"): the dl = "+dl+" \| best: "+minDL);
	2070
	2071	if(dl < minDL)
	2072	minDL = dl; // The best dl so far
	2073
	2074	rst = rstats.getSimpleStats(last);
	2075	if(m_Debug)
	2076	System.err.println("The rule covers: "+rst[0]+
	2077	" \| pos = " + rst[2] +
	2078	" \| neg = " + rst[4]+
	2079	"\nThe rule doesn't cover: "+rst[1]+
	2080	" \| pos = " + rst[5]);
	2081
	2082	stop = checkStop(rst, minDL, dl);
	2083
	2084	if(!stop){
	2085	ruleset.addElement(oneRule); // Accepted
	2086	newData = rstats.getFiltered(last)[1];// Data not covered
	2087	hasPositive = Utils.gr(rst[5], 0.0); // Positives remaining?
	2088	if(m_Debug)
	2089	System.err.println("One rule added: has positive? "
	2090	+hasPositive);
	2091	}
	2092	else{
	2093	if(m_Debug)
	2094	System.err.println("Quit rule");
	2095	rstats.removeLast(); // Remove last to be re-used
	2096	}
	2097	}// while !stop
	2098
	2099
	2100	/****************** Optimization stage *****************/
	2101
	2102	RuleStats finalRulesetStat = null;
	2103	for(int z=0; z < m_Optimizations; z++){
	2104	if(m_Debug)
	2105	System.err.println("\n*** Optimization: run #"
	2106	+z+" ***");
	2107
	2108	newData = data;
	2109	finalRulesetStat = new RuleStats();
	2110	finalRulesetStat.setData(newData);
	2111	finalRulesetStat.setNumAllConds(m_Total);
	2112	int position=0;
	2113	stop = false;
	2114	boolean isResidual = false;
	2115	hasPositive = defHasPositive;
	2116	dl = minDL = defDL;
	2117
	2118	oneRule:
	2119	while(!stop && hasPositive){
	2120
	2121	isResidual = (position>=ruleset.size()); // Cover residual positive examples
	2122	// Re-do shuffling and stratification
	2123	//newData.randomize(m_Random);
	2124	newData = RuleStats.stratify(newData, m_Folds, m_Random);
	2125	Instances[] part = RuleStats.partition(newData, m_Folds);
	2126	growData=part[0];
	2127	pruneData=part[1];
	2128	//growData=newData.trainCV(m_Folds, m_Folds-1);
	2129	//pruneData=newData.testCV(m_Folds, m_Folds-1);
	2130	RipperRule finalRule;
	2131
	2132	if(m_Debug)
	2133	System.err.println("\nRule #"+position +
	2134	"\| isResidual?" + isResidual+
	2135	"\| data size: "+newData.sumOfWeights());
	2136
	2137	if(isResidual){
	2138	RipperRule newRule = new RipperRule();
	2139	newRule.setConsequent(classIndex);
	2140	if(m_Debug)
	2141	System.err.println("\nGrowing and pruning"+
	2142	" a new rule ...");
	2143	newRule.grow(newData);
	2144	finalRule = newRule;
	2145	if(m_Debug)
	2146	System.err.println("\nNew rule found: "+
	2147	newRule.toString(m_Class));
	2148	}
	2149	else{
	2150	RipperRule oldRule = (RipperRule)ruleset.elementAt(position);
	2151	boolean covers = false;
	2152	// Test coverage of the next old rule
	2153	for(int i=0; i<newData.numInstances(); i++)
	2154	if(oldRule.covers(newData.instance(i))){
	2155	covers = true;
	2156	break;
	2157	}
	2158
	2159	if(!covers){// Null coverage, no variants can be generated
	2160	finalRulesetStat.addAndUpdate(oldRule);
	2161	position++;
	2162	continue oneRule;
	2163	}
	2164
	2165	// 2 variants
	2166	if(m_Debug)
	2167	System.err.println("\nGrowing and pruning"+
	2168	" Replace ...");
	2169	RipperRule replace = new RipperRule();
	2170	replace.setConsequent(classIndex);
	2171	replace.grow(growData);
	2172
	2173	// Remove the pruning data covered by the following
	2174	// rules, then simply compute the error rate of the
	2175	// current rule to prune it. According to Ripper,
	2176	// it's equivalent to computing the error of the
	2177	// whole ruleset -- is it true?
	2178	pruneData = RuleStats.rmCoveredBySuccessives(pruneData,ruleset, position);
	2179	replace.prune(pruneData, true);
	2180
	2181	if(m_Debug)
	2182	System.err.println("\nGrowing and pruning"+
	2183	" Revision ...");
	2184	RipperRule revision = (RipperRule)oldRule.copy();
	2185
	2186	// For revision, first rm the data covered by the old rule
	2187	Instances newGrowData = new Instances(growData, 0);
	2188	for(int b=0; b<growData.numInstances(); b++){
	2189	Instance inst = growData.instance(b);
	2190	if(revision.covers(inst))
	2191	newGrowData.add(inst);
	2192	}
	2193	revision.grow(newGrowData);
	2194	revision.prune(pruneData, true);
	2195
	2196	double[][] prevRuleStats = new double[position][6];
	2197	for(int c=0; c < position; c++)
	2198	prevRuleStats[c] = finalRulesetStat.getSimpleStats(c);
	2199
	2200	// Now compare the relative DL of variants
	2201	FastVector tempRules = (FastVector)ruleset.copyElements();
	2202	tempRules.setElementAt(replace, position);
	2203
	2204	RuleStats repStat = new RuleStats(data, tempRules);
	2205	repStat.setNumAllConds(m_Total);
	2206	repStat.countData(position, newData, prevRuleStats);
	2207	//repStat.countData();
	2208	rst = repStat.getSimpleStats(position);
	2209	if(m_Debug)
	2210	System.err.println("Replace rule covers: "+rst[0]+
	2211	" \| pos = " + rst[2] +
	2212	" \| neg = " + rst[4]+
	2213	"\nThe rule doesn't cover: "+rst[1]+
	2214	" \| pos = " + rst[5]);
	2215
	2216	double repDL = repStat.relativeDL(position, expFPRate,
	2217	m_CheckErr);
	2218
	2219	if(m_Debug)
	2220	System.err.println("\nReplace: "+
	2221	replace.toString(m_Class)
	2222	+" \|dl = "+repDL);
	2223
	2224	if(Double.isNaN(repDL) \|\| Double.isInfinite(repDL))
	2225	throw new Exception("Should never happen: repDL"+
	2226	"in optmz. stage NaN or "+
	2227	"infinite!");
	2228
	2229	tempRules.setElementAt(revision, position);
	2230	RuleStats revStat = new RuleStats(data, tempRules);
	2231	revStat.setNumAllConds(m_Total);
	2232	revStat.countData(position, newData, prevRuleStats);
	2233	//revStat.countData();
	2234	double revDL = revStat.relativeDL(position, expFPRate,
	2235	m_CheckErr);
	2236
	2237	if(m_Debug)
	2238	System.err.println("Revision: "
	2239	+ revision.toString(m_Class)
	2240	+" \|dl = "+revDL);
	2241
	2242	if(Double.isNaN(revDL) \|\| Double.isInfinite(revDL))
	2243	throw new Exception("Should never happen: revDL"+
	2244	"in optmz. stage NaN or "+
	2245	"infinite!");
	2246
	2247	rstats = new RuleStats(data, ruleset);
	2248	rstats.setNumAllConds(m_Total);
	2249	rstats.countData(position, newData, prevRuleStats);
	2250	//rstats.countData();
	2251	double oldDL = rstats.relativeDL(position, expFPRate,
	2252	m_CheckErr);
	2253
	2254	if(Double.isNaN(oldDL) \|\| Double.isInfinite(oldDL))
	2255	throw new Exception("Should never happen: oldDL"+
	2256	"in optmz. stage NaN or "+
	2257	"infinite!");
	2258	if(m_Debug)
	2259	System.err.println("Old rule: "+
	2260	oldRule.toString(m_Class)
	2261	+" \|dl = "+oldDL);
	2262
	2263	if(m_Debug)
	2264	System.err.println("\nrepDL: "+repDL+
	2265	"\nrevDL: "+revDL+
	2266	"\noldDL: "+oldDL);
	2267
	2268	if((oldDL <= revDL) && (oldDL <= repDL))
	2269	finalRule = oldRule; // Old the best
	2270	else if(revDL <= repDL)
	2271	finalRule = revision; // Revision the best
	2272	else
	2273	finalRule = replace; // Replace the best
	2274	}
	2275
	2276	finalRulesetStat.addAndUpdate(finalRule);
	2277	rst = finalRulesetStat.getSimpleStats(position);
	2278
	2279	if(isResidual){
	2280	dl += finalRulesetStat.relativeDL(position,
	2281	expFPRate,
	2282	m_CheckErr);
	2283
	2284	if(m_Debug)
	2285	System.err.println("After optimization: the dl"
	2286	+"="+dl+" \| best: "+minDL);
	2287
	2288	if(dl < minDL)
	2289	minDL = dl; // The best dl so far
	2290
	2291	stop = checkStop(rst, minDL, dl);
	2292	if(!stop)
	2293	ruleset.addElement(finalRule); // Accepted
	2294	else{
	2295	finalRulesetStat.removeLast(); // Remove last to be re-used
	2296	position--;
	2297	}
	2298	}
	2299	else
	2300	ruleset.setElementAt(finalRule, position); // Accepted
	2301
	2302	if(m_Debug){
	2303	System.err.println("The rule covers: "+rst[0]+
	2304	" \| pos = " + rst[2] +
	2305	" \| neg = " + rst[4]+
	2306	"\nThe rule doesn't cover: "+rst[1]+
	2307	" \| pos = " + rst[5]);
	2308	System.err.println("\nRuleset so far: ");
	2309	for(int x=0; x<ruleset.size(); x++)
	2310	System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class));
	2311	System.err.println();
	2312	}
	2313
	2314	//Data not covered
	2315	if(finalRulesetStat.getRulesetSize() > 0)// If any rules
	2316	newData = finalRulesetStat.getFiltered(position)[1];
	2317	hasPositive = Utils.gr(rst[5], 0.0); //Positives remaining?
	2318	position++;
	2319	} // while !stop && hasPositive
	2320
	2321	if(ruleset.size() > (position+1)){ // Hasn't gone through yet
	2322	for(int k=position+1; k<ruleset.size(); k++)
	2323	finalRulesetStat.addAndUpdate((Rule)ruleset.elementAt(k));
	2324	}
	2325	if(m_Debug)
	2326	System.err.println("\nDeleting rules to decrease"+
	2327	" DL of the whole ruleset ...");
	2328	finalRulesetStat.reduceDL(expFPRate, m_CheckErr);
	2329
	2330	if(m_Debug){
	2331	int del = ruleset.size() -
	2332	finalRulesetStat.getRulesetSize();
	2333	System.err.println(del+" rules are deleted"+
	2334	" after DL reduction procedure");
	2335	}
	2336	ruleset = finalRulesetStat.getRuleset();
	2337	rstats = finalRulesetStat;
	2338
	2339	} // For each run of optimization
	2340
	2341	// Concatenate the ruleset for this class to the whole ruleset
	2342	if(m_Debug){
	2343	System.err.println("\nFinal ruleset: ");
	2344	for(int x=0; x<ruleset.size(); x++)
	2345	System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class));
	2346	System.err.println();
	2347	}
	2348
	2349
	2350	m_Ruleset.appendElements(ruleset);
	2351	m_RulesetStats.addElement(rstats);
	2352
	2353	return null;
	2354	}
	2355
	2356	/**
	2357	* Check whether the stopping criterion meets
	2358	*
	2359	* @param rst the statistic of the ruleset
	2360	* @param minDL the min description length so far
	2361	* @param dl the current description length of the ruleset
	2362	* @return true if stop criterion meets, false otherwise
	2363	*/
	2364	private boolean checkStop(double[] rst, double minDL, double dl){
	2365
	2366
	2367	if(dl > minDL+MAX_DL_SURPLUS){
	2368	if(m_Debug)
	2369	System.err.println("DL too large: "+dl+" \| "+minDL);
	2370	return true;
	2371	}
	2372	else
	2373	if(!Utils.gr(rst[2], 0.0)){// Covered positives
	2374	if(m_Debug)
	2375	System.err.println("Too few positives.");
	2376	return true;
	2377	}
	2378	else if((rst[4]/rst[0]) >= 0.5){// Err rate
	2379	if(m_CheckErr){
	2380	if(m_Debug)
	2381	System.err.println("Error too large: "+
	2382	rst[4] + "/" + rst[0]);
	2383	return true;
	2384	}
	2385	else
	2386	return false;
	2387	}
	2388	else{// Not stops
	2389	if(m_Debug)
	2390	System.err.println("Continue.");
	2391	return false;
	2392	}
	2393	}
	2394
	2395	/**
	2396	* Prints the all the rules of the rule learner.
	2397	*
	2398	* @return a textual description of the classifier
	2399	*/
	2400	public String toString() {
	2401	if (m_Ruleset == null)
	2402	return "FURIA: No model built yet.";
	2403
	2404	StringBuffer sb = new StringBuffer("FURIA rules:\n"+
	2405	"===========\n\n");
	2406	for(int j=0; j<m_RulesetStats.size(); j++){
	2407	RuleStats rs = (RuleStats)m_RulesetStats.elementAt(j);
	2408	FastVector rules = rs.getRuleset();
	2409	for(int k=0; k<rules.size(); k++){
	2410	sb.append(((RipperRule)rules.elementAt(k)).toString(m_Class)
	2411	+ " (CF = " + Math.round(100.0*((RipperRule)rules.elementAt(k)).getConfidence())/100.0 +")\n");
	2412	}
	2413	}
	2414	if(m_Debug){
	2415	System.err.println("Inside m_Ruleset");
	2416	for(int i=0; i<m_Ruleset.size(); i++)
	2417	System.err.println(((RipperRule)m_Ruleset.elementAt(i)).toString(m_Class));
	2418	}
	2419	sb.append("\nNumber of Rules : "
	2420	+ m_Ruleset.size() + "\n");
	2421
	2422
	2423
	2424
	2425	return sb.toString();
	2426	}
	2427
	2428	/**
	2429	* Main method.
	2430	*
	2431	* @param args the options for the classifier
	2432	* @throws Exception
	2433	*/
	2434	public static void main(String[] args) throws Exception {
	2435	runClassifier(new FURIA(), args);
	2436	}
	2437
	2438	/**
	2439	*
	2440	*/
	2441	public String getRevision() {
	2442	return "$Revision: 5964 $";
	2443	}
	2444	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: